[refactor] fix warings when compile with clang (#8069)
This commit is contained in:
@ -198,8 +198,7 @@ static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, v
|
||||
|
||||
template <typename T>
|
||||
static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type,
|
||||
vectorized::IColumn* col_ptr, bool pure_doc_value,
|
||||
bool nullable) {
|
||||
vectorized::IColumn* col_ptr, bool pure_doc_value, bool nullable) {
|
||||
static_assert(sizeof(T) == 4 || sizeof(T) == 8);
|
||||
if (col.IsNumber() && nullable) {
|
||||
T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
|
||||
@ -229,8 +228,7 @@ static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type
|
||||
|
||||
template <typename T>
|
||||
static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
|
||||
vectorized::IColumn* col_ptr, bool pure_doc_value,
|
||||
bool nullable) {
|
||||
vectorized::IColumn* col_ptr, bool pure_doc_value, bool nullable) {
|
||||
if (col.IsNumber()) {
|
||||
T value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
|
||||
@ -258,8 +256,7 @@ static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
ScrollParser::ScrollParser(bool doc_value_mode)
|
||||
: _scroll_id(""), _size(0), _line_index(0), _doc_value_mode(doc_value_mode) {}
|
||||
ScrollParser::ScrollParser(bool doc_value_mode) : _scroll_id(""), _size(0), _line_index(0) {}
|
||||
|
||||
ScrollParser::~ScrollParser() {}
|
||||
|
||||
@ -503,7 +500,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple,
|
||||
strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
|
||||
}
|
||||
|
||||
const rapidjson::Value& str_col = is_nested_str? col[0]: col;
|
||||
const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
|
||||
const std::string& val = str_col.GetString();
|
||||
size_t val_size = str_col.GetStringLength();
|
||||
StringParser::ParseResult result;
|
||||
@ -643,12 +640,110 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
|
||||
return Status::RuntimeError(details);
|
||||
}
|
||||
switch (type) {
|
||||
case TYPE_CHAR:
|
||||
case TYPE_VARCHAR:
|
||||
case TYPE_STRING: {
|
||||
// sometimes elasticsearch user post some not-string value to Elasticsearch Index.
|
||||
// because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
|
||||
// this may be a tricky, but we can workaround this issue
|
||||
case TYPE_CHAR:
|
||||
case TYPE_VARCHAR:
|
||||
case TYPE_STRING: {
|
||||
// sometimes elasticsearch user post some not-string value to Elasticsearch Index.
|
||||
// because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
|
||||
// this may be a tricky, but we can workaround this issue
|
||||
std::string val;
|
||||
if (pure_doc_value) {
|
||||
if (!col[0].IsString()) {
|
||||
val = json_value_to_string(col[0]);
|
||||
} else {
|
||||
val = col[0].GetString();
|
||||
}
|
||||
} else {
|
||||
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
|
||||
if (!col.IsString()) {
|
||||
val = json_value_to_string(col);
|
||||
} else {
|
||||
val = col.GetString();
|
||||
}
|
||||
}
|
||||
size_t val_size = val.length();
|
||||
col_ptr->insert_data(const_cast<const char*>(val.data()), val_size);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_TINYINT: {
|
||||
insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_SMALLINT: {
|
||||
insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_INT: {
|
||||
insert_int_value<int32>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BIGINT: {
|
||||
insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_LARGEINT: {
|
||||
insert_int_value<__int128>(col, type, col_ptr, pure_doc_value,
|
||||
slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DOUBLE: {
|
||||
insert_float_value<double>(col, type, col_ptr, pure_doc_value,
|
||||
slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_FLOAT: {
|
||||
insert_float_value<float>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BOOLEAN: {
|
||||
if (col.IsBool()) {
|
||||
int8_t val = col.GetBool();
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (col.IsNumber()) {
|
||||
int8_t val = col.GetInt();
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
break;
|
||||
}
|
||||
|
||||
bool is_nested_str = false;
|
||||
if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
|
||||
int8_t val = col[0].GetBool();
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
break;
|
||||
} else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
|
||||
is_nested_str = true;
|
||||
} else if (pure_doc_value && col.IsArray()) {
|
||||
return Status::InternalError(
|
||||
strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
|
||||
}
|
||||
|
||||
const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
|
||||
|
||||
const std::string& val = str_col.GetString();
|
||||
size_t val_size = str_col.GetStringLength();
|
||||
StringParser::ParseResult result;
|
||||
bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
|
||||
RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
|
||||
break;
|
||||
}
|
||||
case TYPE_DECIMALV2: {
|
||||
DecimalV2Value data;
|
||||
|
||||
if (col.IsDouble()) {
|
||||
data.assign_from_double(col.GetDouble());
|
||||
} else {
|
||||
std::string val;
|
||||
if (pure_doc_value) {
|
||||
if (!col[0].IsString()) {
|
||||
@ -664,142 +759,44 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
|
||||
val = col.GetString();
|
||||
}
|
||||
}
|
||||
size_t val_size = val.length();
|
||||
col_ptr->insert_data(
|
||||
const_cast<const char*>(val.data()), val_size);
|
||||
break;
|
||||
data.parse_from_str(val.data(), val.length());
|
||||
}
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_TINYINT: {
|
||||
insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_SMALLINT: {
|
||||
insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_INT: {
|
||||
insert_int_value<int32>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BIGINT: {
|
||||
insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_LARGEINT: {
|
||||
insert_int_value<__int128>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DOUBLE: {
|
||||
insert_float_value<double>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_FLOAT: {
|
||||
insert_float_value<float>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BOOLEAN: {
|
||||
if (col.IsBool()) {
|
||||
int8_t val = col.GetBool();
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
case TYPE_DATE:
|
||||
case TYPE_DATETIME: {
|
||||
// this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
|
||||
if (col.IsNumber()) {
|
||||
// ES process date/datetime field would use millisecond timestamp for index or docvalue
|
||||
// processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
|
||||
// Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
|
||||
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
|
||||
} else if (col.IsArray() && pure_doc_value) {
|
||||
// this would happened just only when `enable_docvalue_scan = true`
|
||||
// ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
|
||||
// a standard date-format for date field as `2020-06-16T00:00:00.000Z`
|
||||
// At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
|
||||
// date field's docvalue
|
||||
if (col[0].IsString()) {
|
||||
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type));
|
||||
break;
|
||||
}
|
||||
|
||||
if (col.IsNumber()) {
|
||||
int8_t val = col.GetInt();
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
break;
|
||||
}
|
||||
|
||||
bool is_nested_str = false;
|
||||
if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
|
||||
int8_t val = col[0].GetBool();
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
|
||||
break;
|
||||
} else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
|
||||
is_nested_str = true;
|
||||
} else if (pure_doc_value && col.IsArray()) {
|
||||
return Status::InternalError(
|
||||
strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
|
||||
}
|
||||
|
||||
const rapidjson::Value& str_col = is_nested_str? col[0]: col;
|
||||
|
||||
const std::string& val = str_col.GetString();
|
||||
size_t val_size = str_col.GetStringLength();
|
||||
StringParser::ParseResult result;
|
||||
bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
|
||||
RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
|
||||
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
|
||||
break;
|
||||
}
|
||||
case TYPE_DECIMALV2: {
|
||||
DecimalV2Value data;
|
||||
|
||||
if (col.IsDouble()) {
|
||||
data.assign_from_double(col.GetDouble());
|
||||
} else {
|
||||
std::string val;
|
||||
if (pure_doc_value) {
|
||||
if (!col[0].IsString()) {
|
||||
val = json_value_to_string(col[0]);
|
||||
} else {
|
||||
val = col[0].GetString();
|
||||
}
|
||||
} else {
|
||||
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
|
||||
if (!col.IsString()) {
|
||||
val = json_value_to_string(col);
|
||||
} else {
|
||||
val = col.GetString();
|
||||
}
|
||||
}
|
||||
data.parse_from_str(val.data(), val.length());
|
||||
}
|
||||
col_ptr->insert_data(
|
||||
const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DATE:
|
||||
case TYPE_DATETIME: {
|
||||
// this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
|
||||
if (col.IsNumber()) {
|
||||
// ES process date/datetime field would use millisecond timestamp for index or docvalue
|
||||
// processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
|
||||
// Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
|
||||
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
|
||||
} else if (col.IsArray() && pure_doc_value) {
|
||||
// this would happened just only when `enable_docvalue_scan = true`
|
||||
// ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
|
||||
// a standard date-format for date field as `2020-06-16T00:00:00.000Z`
|
||||
// At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
|
||||
// date field's docvalue
|
||||
if (col[0].IsString()) {
|
||||
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type));
|
||||
break;
|
||||
}
|
||||
// ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
|
||||
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
|
||||
} else {
|
||||
// this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
|
||||
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
|
||||
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
|
||||
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
DCHECK(false);
|
||||
break;
|
||||
// ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
|
||||
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
|
||||
} else {
|
||||
// this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
|
||||
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
|
||||
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
|
||||
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
DCHECK(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -875,5 +872,4 @@ Status ScrollParser::fill_date_col_with_timestamp(vectorized::IColumn* col_ptr,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
} // namespace doris
|
||||
|
||||
Reference in New Issue
Block a user