[fix](json) fix json int128 overflow (#22917)

* support int128 in jsonb

* fix jsonb int128 write

* fix jsonb to json int128

* fix json functions for int128

* add nereids function jsonb_extract_largeint

* add testcase for json int128

* change docs for json int128

* add nereids function jsonb_extract_largeint

* clang format

* fix check style

* using int128_t = __int128_t for all int128

* use fmt::format_to instead of snprintf digit by digit for int128

* clang format

* delete useless check

* add warn log

* clang format
This commit is contained in:
Kang
2023-08-25 11:40:30 +08:00
committed by GitHub
parent 372f83df5c
commit 8ef6b4d996
20 changed files with 2972 additions and 36 deletions

View File

@ -17,6 +17,8 @@
#include "runtime/jsonb_value.h"
#include <fmt/format.h>
#include <string_view>
#include "util/jsonb_error.h"
@ -30,8 +32,10 @@ Status JsonBinaryValue::from_json_string(const char* s, int length) {
JsonbErrType error = JsonbErrType::E_NONE;
if (!parser.parse(s, length)) {
error = parser.getErrorCode();
return Status::InvalidArgument("json parse error: {} for value: {}",
JsonbErrMsg::getErrMsg(error), std::string_view(s, length));
auto msg = fmt::format("json parse error: {} for value: {}", JsonbErrMsg::getErrMsg(error),
std::string_view(s, length));
LOG(WARNING) << msg;
return Status::InvalidArgument(msg);
}
ptr = parser.getWriter().getOutput()->getBuffer();

View File

@ -91,6 +91,8 @@ namespace doris {
#define JSONB_VER 1
using int128_t = __int128;
// forward declaration
class JsonbValue;
class ObjectVal;
@ -517,6 +519,8 @@ public:
return "int";
case JsonbType::T_Int64:
return "bigint";
case JsonbType::T_Int128:
return "largeint";
case JsonbType::T_Double:
return "double";
case JsonbType::T_Float:
@ -624,11 +628,11 @@ inline bool JsonbInt64Val::setVal(int64_t value) {
return true;
}
typedef NumberValT<__int128_t> JsonbInt128Val;
typedef NumberValT<int128_t> JsonbInt128Val;
// override setVal for Int64Val
// override setVal for Int128Val
template <>
inline bool JsonbInt128Val::setVal(__int128_t value) {
inline bool JsonbInt128Val::setVal(int128_t value) {
if (!isInt128()) {
return false;
}
@ -666,7 +670,7 @@ inline bool JsonbFloatVal::setVal(float value) {
// A class to get an integer
class JsonbIntVal : public JsonbValue {
public:
int64_t val() const {
int128_t val() const {
switch (type_) {
case JsonbType::T_Int8:
return ((JsonbInt8Val*)this)->val();
@ -676,11 +680,13 @@ public:
return ((JsonbInt32Val*)this)->val();
case JsonbType::T_Int64:
return ((JsonbInt64Val*)this)->val();
case JsonbType::T_Int128:
return ((JsonbInt128Val*)this)->val();
default:
return 0;
}
}
bool setVal(int64_t val) {
bool setVal(int128_t val) {
switch (type_) {
case JsonbType::T_Int8:
if (val < std::numeric_limits<int8_t>::min() ||
@ -698,7 +704,9 @@ public:
return false;
return ((JsonbInt32Val*)this)->setVal((int32_t)val);
case JsonbType::T_Int64:
return ((JsonbInt64Val*)this)->setVal(val);
return ((JsonbInt64Val*)this)->setVal((int64_t)val);
case JsonbType::T_Int128:
return ((JsonbInt128Val*)this)->setVal(val);
default:
return false;
}
@ -1131,7 +1139,7 @@ inline unsigned int JsonbValue::numPackedBytes() const {
return sizeof(type_) + sizeof(float);
}
case JsonbType::T_Int128: {
return sizeof(type_) + sizeof(__int128_t);
return sizeof(type_) + sizeof(int128_t);
}
case JsonbType::T_String:
case JsonbType::T_Binary: {
@ -1168,7 +1176,7 @@ inline unsigned int JsonbValue::size() const {
return sizeof(float);
}
case JsonbType::T_Int128: {
return sizeof(__int128_t);
return sizeof(int128_t);
}
case JsonbType::T_String:
case JsonbType::T_Binary: {

View File

@ -71,6 +71,8 @@
namespace doris {
using int128_t = __int128;
/*
* Template JsonbParserTSIMD
*/
@ -296,21 +298,22 @@ public:
return;
}
} else if (num.is_int64() || num.is_uint64()) {
if (num.is_uint64() && num.get_uint64() > std::numeric_limits<int64_t>::max()) {
err_ = JsonbErrType::E_OCTAL_OVERFLOW;
LOG(WARNING) << "overflow number: " << num.get_uint64();
return;
}
int64_t val = num.is_int64() ? num.get_int64() : num.get_uint64();
int128_t val = num.is_int64() ? (int128_t)num.get_int64() : (int128_t)num.get_uint64();
int size = 0;
if (val <= std::numeric_limits<int8_t>::max()) {
if (val >= std::numeric_limits<int8_t>::min() &&
val <= std::numeric_limits<int8_t>::max()) {
size = writer_.writeInt8((int8_t)val);
} else if (val <= std::numeric_limits<int16_t>::max()) {
} else if (val >= std::numeric_limits<int16_t>::min() &&
val <= std::numeric_limits<int16_t>::max()) {
size = writer_.writeInt16((int16_t)val);
} else if (val <= std::numeric_limits<int32_t>::max()) {
} else if (val >= std::numeric_limits<int32_t>::min() &&
val <= std::numeric_limits<int32_t>::max()) {
size = writer_.writeInt32((int32_t)val);
} else { // val <= INT64_MAX
size = writer_.writeInt64(val);
} else if (val >= std::numeric_limits<int64_t>::min() &&
val <= std::numeric_limits<int64_t>::max()) {
size = writer_.writeInt64((int64_t)val);
} else { // INT128
size = writer_.writeInt128(val);
}
if (size == 0) {

View File

@ -34,16 +34,21 @@
#endif
#include <assert.h>
#include <fmt/format.h>
#include <string.h>
#include <algorithm>
#include <cinttypes>
#include <iostream>
namespace doris {
using int128_t = __int128;
// lengths includes sign
#define MAX_INT_DIGITS 11
#define MAX_INT64_DIGITS 20
#define MAX_INT128_DIGITS 40
#define MAX_DOUBLE_DIGITS 23 // 1(sign)+16(significant)+1(decimal)+5(exponent)
/*
@ -126,9 +131,15 @@ public:
size_ += len;
}
void write(__int128 l) {
// TODO
assert(false);
void write(int128_t l) {
// snprintf automatically adds a NULL, so we need one more char
if (size_ + MAX_INT128_DIGITS + 1 > capacity_) {
realloc(MAX_INT128_DIGITS + 1);
}
const auto result = fmt::format_to_n(head_ + size_, MAX_INT128_DIGITS, "{}", l);
assert(result.size > 0);
size_ += result.size;
}
// write the double to string

View File

@ -224,9 +224,8 @@ private:
if (value->isInt() && curr->isInt()) {
// Both are ints and optimization can be done here
int64_t val = ((const JsonbIntVal*)value)->val();
// setVal may fail because the new value can't fit into the current one.
if (((JsonbIntVal*)curr)->setVal(val)) {
if (((JsonbIntVal*)curr)->setVal(((const JsonbIntVal*)value)->val())) {
return JsonbErrType::E_NONE;
}
}

View File

@ -40,8 +40,11 @@ public:
// get json string
const std::string to_json_string(const char* data, size_t size) {
doris::JsonbValue* pval = doris::JsonbDocument::createDocument(data, size)->getValue();
return to_json_string(pval);
JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size);
if (!pdoc) {
LOG(FATAL) << "invalid json binary value: " << std::string_view(data, size);
}
return to_json_string(pdoc->getValue());
}
const std::string to_json_string(const JsonbValue* val) {

View File

@ -45,6 +45,8 @@
namespace doris {
using int128_t = __int128;
template <class OS_TYPE>
class JsonbWriterT {
public:
@ -234,10 +236,11 @@ public:
return 0;
}
uint32_t writeInt128(__int128_t v) {
uint32_t writeInt128(int128_t v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) return 0;
os_->put((JsonbTypeUnder)JsonbType::T_Int128);
os_->write((char*)&v, sizeof(__int128_t));
os_->write((char*)&v, sizeof(int128_t));
kvState_ = WS_Value;
return sizeof(JsonbInt128Val);
}

View File

@ -616,6 +616,8 @@ struct ConvertImplNumberToJsonb {
writer.writeInt32(data[i]);
} else if constexpr (std::is_same_v<ColumnInt64, ColumnType>) {
writer.writeInt64(data[i]);
} else if constexpr (std::is_same_v<ColumnInt128, ColumnType>) {
writer.writeInt128(data[i]);
} else if constexpr (std::is_same_v<ColumnFloat64, ColumnType>) {
writer.writeDouble(data[i]);
} else {
@ -721,7 +723,7 @@ struct ConvertImplFromJsonb {
}
} else if constexpr (type_index == TypeIndex::Int8) {
if (value->isInt8()) {
res[i] = ((const JsonbIntVal*)value)->val();
res[i] = (int8_t)((const JsonbIntVal*)value)->val();
} else {
null_map[i] = 1;
res[i] = 0;
@ -743,7 +745,15 @@ struct ConvertImplFromJsonb {
} else if constexpr (type_index == TypeIndex::Int64) {
if (value->isInt8() || value->isInt16() || value->isInt32() ||
value->isInt64()) {
res[i] = ((const JsonbIntVal*)value)->val();
res[i] = (int64_t)((const JsonbIntVal*)value)->val();
} else {
null_map[i] = 1;
res[i] = 0;
}
} else if constexpr (type_index == TypeIndex::Int128) {
if (value->isInt8() || value->isInt16() || value->isInt32() ||
value->isInt64() || value->isInt128()) {
res[i] = (int128_t)((const JsonbIntVal*)value)->val();
} else {
null_map[i] = 1;
res[i] = 0;
@ -1793,6 +1803,8 @@ private:
return &ConvertImplFromJsonb<TypeIndex::Int32, ColumnInt32>::execute;
case TypeIndex::Int64:
return &ConvertImplFromJsonb<TypeIndex::Int64, ColumnInt64>::execute;
case TypeIndex::Int128:
return &ConvertImplFromJsonb<TypeIndex::Int128, ColumnInt128>::execute;
case TypeIndex::Float64:
return &ConvertImplFromJsonb<TypeIndex::Float64, ColumnFloat64>::execute;
default:
@ -1817,6 +1829,8 @@ private:
return &ConvertImplNumberToJsonb<ColumnInt32>::execute;
case TypeIndex::Int64:
return &ConvertImplNumberToJsonb<ColumnInt64>::execute;
case TypeIndex::Int128:
return &ConvertImplNumberToJsonb<ColumnInt128>::execute;
case TypeIndex::Float64:
return &ConvertImplNumberToJsonb<ColumnFloat64>::execute;
case TypeIndex::String:

View File

@ -756,7 +756,15 @@ private:
}
} else if constexpr (std::is_same_v<int64_t, typename ValueType::T>) {
if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64()) {
res[i] = ((const JsonbIntVal*)value)->val();
res[i] = (int64_t)((const JsonbIntVal*)value)->val();
} else {
null_map[i] = 1;
res[i] = 0;
}
} else if constexpr (std::is_same_v<int128_t, typename ValueType::T>) {
if (value->isInt8() || value->isInt16() || value->isInt32() || value->isInt64() ||
value->isInt128()) {
res[i] = (int128_t)((const JsonbIntVal*)value)->val();
} else {
null_map[i] = 1;
res[i] = 0;
@ -892,6 +900,13 @@ struct JsonbTypeInt64 {
static const bool only_check_exists = false;
};
struct JsonbTypeInt128 {
using T = int128_t;
using ReturnType = DataTypeInt128;
using ColumnType = ColumnVector<T>;
static const bool only_check_exists = false;
};
struct JsonbTypeDouble {
using T = double;
using ReturnType = DataTypeFloat64;
@ -948,6 +963,11 @@ struct JsonbExtractBigInt : public JsonbExtractImpl<JsonbTypeInt64> {
static constexpr auto alias = "jsonb_extract_bigint";
};
struct JsonbExtractLargeInt : public JsonbExtractImpl<JsonbTypeInt128> {
static constexpr auto name = "json_extract_largeint";
static constexpr auto alias = "jsonb_extract_largeint";
};
struct JsonbExtractDouble : public JsonbExtractImpl<JsonbTypeDouble> {
static constexpr auto name = "json_extract_double";
static constexpr auto alias = "jsonb_extract_double";
@ -975,6 +995,7 @@ using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>;
using FunctionJsonbExtractBool = FunctionJsonbExtract<JsonbExtractBool>;
using FunctionJsonbExtractInt = FunctionJsonbExtract<JsonbExtractInt>;
using FunctionJsonbExtractBigInt = FunctionJsonbExtract<JsonbExtractBigInt>;
using FunctionJsonbExtractLargeInt = FunctionJsonbExtract<JsonbExtractLargeInt>;
using FunctionJsonbExtractDouble = FunctionJsonbExtract<JsonbExtractDouble>;
using FunctionJsonbExtractString = FunctionJsonbExtract<JsonbExtractString>;
using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>;
@ -1027,6 +1048,8 @@ void register_function_jsonb(SimpleFunctionFactory& factory) {
factory.register_alias(FunctionJsonbExtractInt::name, FunctionJsonbExtractInt::alias);
factory.register_function<FunctionJsonbExtractBigInt>();
factory.register_alias(FunctionJsonbExtractBigInt::name, FunctionJsonbExtractBigInt::alias);
factory.register_function<FunctionJsonbExtractLargeInt>();
factory.register_alias(FunctionJsonbExtractLargeInt::name, FunctionJsonbExtractLargeInt::alias);
factory.register_function<FunctionJsonbExtractDouble>();
factory.register_alias(FunctionJsonbExtractDouble::name, FunctionJsonbExtractDouble::alias);
factory.register_function<FunctionJsonbExtractString>();