[fix](json) fix json int128 overflow (#22917)
* support int128 in jsonb * fix jsonb int128 write * fix jsonb to json int128 * fix json functions for int128 * add nereids function jsonb_extract_largeint * add testcase for json int128 * change docs for json int128 * add nereids function jsonb_extract_largeint * clang format * fix check style * using int128_t = __int128_t for all int128 * use fmt::format_to instead of snprintf digit by digit for int128 * clang format * delete useless check * add warn log * clang format
This commit is contained in:
@ -17,6 +17,8 @@
|
||||
|
||||
#include "runtime/jsonb_value.h"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include "util/jsonb_error.h"
|
||||
@ -30,8 +32,10 @@ Status JsonBinaryValue::from_json_string(const char* s, int length) {
|
||||
JsonbErrType error = JsonbErrType::E_NONE;
|
||||
if (!parser.parse(s, length)) {
|
||||
error = parser.getErrorCode();
|
||||
return Status::InvalidArgument("json parse error: {} for value: {}",
|
||||
JsonbErrMsg::getErrMsg(error), std::string_view(s, length));
|
||||
auto msg = fmt::format("json parse error: {} for value: {}", JsonbErrMsg::getErrMsg(error),
|
||||
std::string_view(s, length));
|
||||
LOG(WARNING) << msg;
|
||||
return Status::InvalidArgument(msg);
|
||||
}
|
||||
|
||||
ptr = parser.getWriter().getOutput()->getBuffer();
|
||||
|
||||
@ -91,6 +91,8 @@ namespace doris {
|
||||
|
||||
#define JSONB_VER 1
|
||||
|
||||
using int128_t = __int128;
|
||||
|
||||
// forward declaration
|
||||
class JsonbValue;
|
||||
class ObjectVal;
|
||||
@ -517,6 +519,8 @@ public:
|
||||
return "int";
|
||||
case JsonbType::T_Int64:
|
||||
return "bigint";
|
||||
case JsonbType::T_Int128:
|
||||
return "largeint";
|
||||
case JsonbType::T_Double:
|
||||
return "double";
|
||||
case JsonbType::T_Float:
|
||||
@ -624,11 +628,11 @@ inline bool JsonbInt64Val::setVal(int64_t value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef NumberValT<__int128_t> JsonbInt128Val;
|
||||
typedef NumberValT<int128_t> JsonbInt128Val;
|
||||
|
||||
// override setVal for Int64Val
|
||||
// override setVal for Int128Val
|
||||
template <>
|
||||
inline bool JsonbInt128Val::setVal(__int128_t value) {
|
||||
inline bool JsonbInt128Val::setVal(int128_t value) {
|
||||
if (!isInt128()) {
|
||||
return false;
|
||||
}
|
||||
@ -666,7 +670,7 @@ inline bool JsonbFloatVal::setVal(float value) {
|
||||
// A class to get an integer
|
||||
class JsonbIntVal : public JsonbValue {
|
||||
public:
|
||||
int64_t val() const {
|
||||
int128_t val() const {
|
||||
switch (type_) {
|
||||
case JsonbType::T_Int8:
|
||||
return ((JsonbInt8Val*)this)->val();
|
||||
@ -676,11 +680,13 @@ public:
|
||||
return ((JsonbInt32Val*)this)->val();
|
||||
case JsonbType::T_Int64:
|
||||
return ((JsonbInt64Val*)this)->val();
|
||||
case JsonbType::T_Int128:
|
||||
return ((JsonbInt128Val*)this)->val();
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
bool setVal(int64_t val) {
|
||||
bool setVal(int128_t val) {
|
||||
switch (type_) {
|
||||
case JsonbType::T_Int8:
|
||||
if (val < std::numeric_limits<int8_t>::min() ||
|
||||
@ -698,7 +704,9 @@ public:
|
||||
return false;
|
||||
return ((JsonbInt32Val*)this)->setVal((int32_t)val);
|
||||
case JsonbType::T_Int64:
|
||||
return ((JsonbInt64Val*)this)->setVal(val);
|
||||
return ((JsonbInt64Val*)this)->setVal((int64_t)val);
|
||||
case JsonbType::T_Int128:
|
||||
return ((JsonbInt128Val*)this)->setVal(val);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -1131,7 +1139,7 @@ inline unsigned int JsonbValue::numPackedBytes() const {
|
||||
return sizeof(type_) + sizeof(float);
|
||||
}
|
||||
case JsonbType::T_Int128: {
|
||||
return sizeof(type_) + sizeof(__int128_t);
|
||||
return sizeof(type_) + sizeof(int128_t);
|
||||
}
|
||||
case JsonbType::T_String:
|
||||
case JsonbType::T_Binary: {
|
||||
@ -1168,7 +1176,7 @@ inline unsigned int JsonbValue::size() const {
|
||||
return sizeof(float);
|
||||
}
|
||||
case JsonbType::T_Int128: {
|
||||
return sizeof(__int128_t);
|
||||
return sizeof(int128_t);
|
||||
}
|
||||
case JsonbType::T_String:
|
||||
case JsonbType::T_Binary: {
|
||||
|
||||
@ -71,6 +71,8 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
using int128_t = __int128;
|
||||
|
||||
/*
|
||||
* Template JsonbParserTSIMD
|
||||
*/
|
||||
@ -296,21 +298,22 @@ public:
|
||||
return;
|
||||
}
|
||||
} else if (num.is_int64() || num.is_uint64()) {
|
||||
if (num.is_uint64() && num.get_uint64() > std::numeric_limits<int64_t>::max()) {
|
||||
err_ = JsonbErrType::E_OCTAL_OVERFLOW;
|
||||
LOG(WARNING) << "overflow number: " << num.get_uint64();
|
||||
return;
|
||||
}
|
||||
int64_t val = num.is_int64() ? num.get_int64() : num.get_uint64();
|
||||
int128_t val = num.is_int64() ? (int128_t)num.get_int64() : (int128_t)num.get_uint64();
|
||||
int size = 0;
|
||||
if (val <= std::numeric_limits<int8_t>::max()) {
|
||||
if (val >= std::numeric_limits<int8_t>::min() &&
|
||||
val <= std::numeric_limits<int8_t>::max()) {
|
||||
size = writer_.writeInt8((int8_t)val);
|
||||
} else if (val <= std::numeric_limits<int16_t>::max()) {
|
||||
} else if (val >= std::numeric_limits<int16_t>::min() &&
|
||||
val <= std::numeric_limits<int16_t>::max()) {
|
||||
size = writer_.writeInt16((int16_t)val);
|
||||
} else if (val <= std::numeric_limits<int32_t>::max()) {
|
||||
} else if (val >= std::numeric_limits<int32_t>::min() &&
|
||||
val <= std::numeric_limits<int32_t>::max()) {
|
||||
size = writer_.writeInt32((int32_t)val);
|
||||
} else { // val <= INT64_MAX
|
||||
size = writer_.writeInt64(val);
|
||||
} else if (val >= std::numeric_limits<int64_t>::min() &&
|
||||
val <= std::numeric_limits<int64_t>::max()) {
|
||||
size = writer_.writeInt64((int64_t)val);
|
||||
} else { // INT128
|
||||
size = writer_.writeInt128(val);
|
||||
}
|
||||
|
||||
if (size == 0) {
|
||||
|
||||
@ -34,16 +34,21 @@
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <fmt/format.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cinttypes>
|
||||
#include <iostream>
|
||||
|
||||
namespace doris {
|
||||
|
||||
using int128_t = __int128;
|
||||
|
||||
// lengths includes sign
|
||||
#define MAX_INT_DIGITS 11
|
||||
#define MAX_INT64_DIGITS 20
|
||||
#define MAX_INT128_DIGITS 40
|
||||
#define MAX_DOUBLE_DIGITS 23 // 1(sign)+16(significant)+1(decimal)+5(exponent)
|
||||
|
||||
/*
|
||||
@ -126,9 +131,15 @@ public:
|
||||
size_ += len;
|
||||
}
|
||||
|
||||
void write(__int128 l) {
|
||||
// TODO
|
||||
assert(false);
|
||||
void write(int128_t l) {
|
||||
// snprintf automatically adds a NULL, so we need one more char
|
||||
if (size_ + MAX_INT128_DIGITS + 1 > capacity_) {
|
||||
realloc(MAX_INT128_DIGITS + 1);
|
||||
}
|
||||
|
||||
const auto result = fmt::format_to_n(head_ + size_, MAX_INT128_DIGITS, "{}", l);
|
||||
assert(result.size > 0);
|
||||
size_ += result.size;
|
||||
}
|
||||
|
||||
// write the double to string
|
||||
|
||||
@ -224,9 +224,8 @@ private:
|
||||
|
||||
if (value->isInt() && curr->isInt()) {
|
||||
// Both are ints and optimization can be done here
|
||||
int64_t val = ((const JsonbIntVal*)value)->val();
|
||||
// setVal may fail because the new value can't fit into the current one.
|
||||
if (((JsonbIntVal*)curr)->setVal(val)) {
|
||||
if (((JsonbIntVal*)curr)->setVal(((const JsonbIntVal*)value)->val())) {
|
||||
return JsonbErrType::E_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -40,8 +40,11 @@ public:
|
||||
|
||||
// get json string
|
||||
const std::string to_json_string(const char* data, size_t size) {
|
||||
doris::JsonbValue* pval = doris::JsonbDocument::createDocument(data, size)->getValue();
|
||||
return to_json_string(pval);
|
||||
JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size);
|
||||
if (!pdoc) {
|
||||
LOG(FATAL) << "invalid json binary value: " << std::string_view(data, size);
|
||||
}
|
||||
return to_json_string(pdoc->getValue());
|
||||
}
|
||||
|
||||
const std::string to_json_string(const JsonbValue* val) {
|
||||
|
||||
@ -45,6 +45,8 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
using int128_t = __int128;
|
||||
|
||||
template <class OS_TYPE>
|
||||
class JsonbWriterT {
|
||||
public:
|
||||
@ -234,10 +236,11 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t writeInt128(__int128_t v) {
|
||||
uint32_t writeInt128(int128_t v) {
|
||||
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
|
||||
if (!writeFirstHeader()) return 0;
|
||||
os_->put((JsonbTypeUnder)JsonbType::T_Int128);
|
||||
os_->write((char*)&v, sizeof(__int128_t));
|
||||
os_->write((char*)&v, sizeof(int128_t));
|
||||
kvState_ = WS_Value;
|
||||
return sizeof(JsonbInt128Val);
|
||||
}
|
||||
|
||||
@ -616,6 +616,8 @@ struct ConvertImplNumberToJsonb {
|
||||
writer.writeInt32(data[i]);
|
||||
} else if constexpr (std::is_same_v<ColumnInt64, ColumnType>) {
|
||||
writer.writeInt64(data[i]);
|
||||
} else if constexpr (std::is_same_v<ColumnInt128, ColumnType>) {
|
||||
writer.writeInt128(data[i]);
|
||||
} else if constexpr (std::is_same_v<ColumnFloat64, ColumnType>) {
|
||||
writer.writeDouble(data[i]);
|
||||
} else {
|
||||
@ -721,7 +723,7 @@ struct ConvertImplFromJsonb {
|
||||
}
|
||||
} else if constexpr (type_index == TypeIndex::Int8) {
|
||||
if (value->isInt8()) {
|
||||
res[i] = ((const JsonbIntVal*)value)->val();
|
||||
res[i] = (int8_t)((const JsonbIntVal*)value)->val();
|
||||
} else {
|
||||
null_map[i] = 1;
|
||||
res[i] = 0;
|
||||
@ -743,7 +745,15 @@ struct ConvertImplFromJsonb {
|
||||
} else if constexpr (type_index == TypeIndex::Int64) {
|
||||
if (value->isInt8() || value->isInt16() || value->isInt32() ||
|
||||
value->isInt64()) {
|
||||
res[i] = ((const JsonbIntVal*)value)->val();
|
||||
res[i] = (int64_t)((const JsonbIntVal*)value)->val();
|
||||
} else {
|
||||
null_map[i] = 1;
|
||||
res[i] = 0;
|
||||
}
|
||||
} else if constexpr (type_index == TypeIndex::Int128) {
|
||||
if (value->isInt8() || value->isInt16() || value->isInt32() ||
|
||||
value->isInt64() || value->isInt128()) {
|
||||
res[i] = (int128_t)((const JsonbIntVal*)value)->val();
|
||||
} else {
|
||||
null_map[i] = 1;
|
||||
res[i] = 0;
|
||||
@ -1793,6 +1803,8 @@ private:
|
||||
return &ConvertImplFromJsonb<TypeIndex::Int32, ColumnInt32>::execute;
|
||||
case TypeIndex::Int64:
|
||||
return &ConvertImplFromJsonb<TypeIndex::Int64, ColumnInt64>::execute;
|
||||
case TypeIndex::Int128:
|
||||
return &ConvertImplFromJsonb<TypeIndex::Int128, ColumnInt128>::execute;
|
||||
case TypeIndex::Float64:
|
||||
return &ConvertImplFromJsonb<TypeIndex::Float64, ColumnFloat64>::execute;
|
||||
default:
|
||||
@ -1817,6 +1829,8 @@ private:
|
||||
return &ConvertImplNumberToJsonb<ColumnInt32>::execute;
|
||||
case TypeIndex::Int64:
|
||||
return &ConvertImplNumberToJsonb<ColumnInt64>::execute;
|
||||
case TypeIndex::Int128:
|
||||
return &ConvertImplNumberToJsonb<ColumnInt128>::execute;
|
||||
case TypeIndex::Float64:
|
||||
return &ConvertImplNumberToJsonb<ColumnFloat64>::execute;
|
||||
case TypeIndex::String:
|
||||
|
||||
@ -756,7 +756,15 @@ private:
|
||||
}
|
||||
} else if constexpr (std::is_same_v<int64_t, typename ValueType::T>) {
|
||||
if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64()) {
|
||||
res[i] = ((const JsonbIntVal*)value)->val();
|
||||
res[i] = (int64_t)((const JsonbIntVal*)value)->val();
|
||||
} else {
|
||||
null_map[i] = 1;
|
||||
res[i] = 0;
|
||||
}
|
||||
} else if constexpr (std::is_same_v<int128_t, typename ValueType::T>) {
|
||||
if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64() ||
|
||||
value->isInt128()) {
|
||||
res[i] = (int128_t)((const JsonbIntVal*)value)->val();
|
||||
} else {
|
||||
null_map[i] = 1;
|
||||
res[i] = 0;
|
||||
@ -892,6 +900,13 @@ struct JsonbTypeInt64 {
|
||||
static const bool only_check_exists = false;
|
||||
};
|
||||
|
||||
struct JsonbTypeInt128 {
|
||||
using T = int128_t;
|
||||
using ReturnType = DataTypeInt128;
|
||||
using ColumnType = ColumnVector<T>;
|
||||
static const bool only_check_exists = false;
|
||||
};
|
||||
|
||||
struct JsonbTypeDouble {
|
||||
using T = double;
|
||||
using ReturnType = DataTypeFloat64;
|
||||
@ -948,6 +963,11 @@ struct JsonbExtractBigInt : public JsonbExtractImpl<JsonbTypeInt64> {
|
||||
static constexpr auto alias = "jsonb_extract_bigint";
|
||||
};
|
||||
|
||||
struct JsonbExtractLargeInt : public JsonbExtractImpl<JsonbTypeInt128> {
|
||||
static constexpr auto name = "json_extract_largeint";
|
||||
static constexpr auto alias = "jsonb_extract_largeint";
|
||||
};
|
||||
|
||||
struct JsonbExtractDouble : public JsonbExtractImpl<JsonbTypeDouble> {
|
||||
static constexpr auto name = "json_extract_double";
|
||||
static constexpr auto alias = "jsonb_extract_double";
|
||||
@ -975,6 +995,7 @@ using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>;
|
||||
using FunctionJsonbExtractBool = FunctionJsonbExtract<JsonbExtractBool>;
|
||||
using FunctionJsonbExtractInt = FunctionJsonbExtract<JsonbExtractInt>;
|
||||
using FunctionJsonbExtractBigInt = FunctionJsonbExtract<JsonbExtractBigInt>;
|
||||
using FunctionJsonbExtractLargeInt = FunctionJsonbExtract<JsonbExtractLargeInt>;
|
||||
using FunctionJsonbExtractDouble = FunctionJsonbExtract<JsonbExtractDouble>;
|
||||
using FunctionJsonbExtractString = FunctionJsonbExtract<JsonbExtractString>;
|
||||
using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>;
|
||||
@ -1027,6 +1048,8 @@ void register_function_jsonb(SimpleFunctionFactory& factory) {
|
||||
factory.register_alias(FunctionJsonbExtractInt::name, FunctionJsonbExtractInt::alias);
|
||||
factory.register_function<FunctionJsonbExtractBigInt>();
|
||||
factory.register_alias(FunctionJsonbExtractBigInt::name, FunctionJsonbExtractBigInt::alias);
|
||||
factory.register_function<FunctionJsonbExtractLargeInt>();
|
||||
factory.register_alias(FunctionJsonbExtractLargeInt::name, FunctionJsonbExtractLargeInt::alias);
|
||||
factory.register_function<FunctionJsonbExtractDouble>();
|
||||
factory.register_alias(FunctionJsonbExtractDouble::name, FunctionJsonbExtractDouble::alias);
|
||||
factory.register_function<FunctionJsonbExtractString>();
|
||||
|
||||
Reference in New Issue
Block a user