[refactor] fix warings when compile with clang (#8069)

This commit is contained in:
Zhengguo Yang
2022-02-19 11:29:02 +08:00
committed by GitHub
parent 8892780091
commit 50864aca7d
200 changed files with 1750 additions and 2617 deletions

View File

@ -198,8 +198,7 @@ static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, v
template <typename T>
static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type,
vectorized::IColumn* col_ptr, bool pure_doc_value,
bool nullable) {
vectorized::IColumn* col_ptr, bool pure_doc_value, bool nullable) {
static_assert(sizeof(T) == 4 || sizeof(T) == 8);
if (col.IsNumber() && nullable) {
T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
@ -229,8 +228,7 @@ static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type
template <typename T>
static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
vectorized::IColumn* col_ptr, bool pure_doc_value,
bool nullable) {
vectorized::IColumn* col_ptr, bool pure_doc_value, bool nullable) {
if (col.IsNumber()) {
T value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
@ -258,8 +256,7 @@ static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
return Status::OK();
}
ScrollParser::ScrollParser(bool doc_value_mode)
: _scroll_id(""), _size(0), _line_index(0), _doc_value_mode(doc_value_mode) {}
ScrollParser::ScrollParser(bool doc_value_mode) : _scroll_id(""), _size(0), _line_index(0) {}
ScrollParser::~ScrollParser() {}
@ -503,7 +500,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple,
strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
}
const rapidjson::Value& str_col = is_nested_str? col[0]: col;
const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
const std::string& val = str_col.GetString();
size_t val_size = str_col.GetStringLength();
StringParser::ParseResult result;
@ -643,12 +640,110 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
return Status::RuntimeError(details);
}
switch (type) {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
// sometimes elasticsearch user post some not-string value to Elasticsearch Index.
// because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
// this may be a tricky, but we can workaround this issue
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
// sometimes elasticsearch user post some not-string value to Elasticsearch Index.
// because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
// this may be a tricky, but we can workaround this issue
std::string val;
if (pure_doc_value) {
if (!col[0].IsString()) {
val = json_value_to_string(col[0]);
} else {
val = col[0].GetString();
}
} else {
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
if (!col.IsString()) {
val = json_value_to_string(col);
} else {
val = col.GetString();
}
}
size_t val_size = val.length();
col_ptr->insert_data(const_cast<const char*>(val.data()), val_size);
break;
}
case TYPE_TINYINT: {
insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_SMALLINT: {
insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_INT: {
insert_int_value<int32>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_BIGINT: {
insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_LARGEINT: {
insert_int_value<__int128>(col, type, col_ptr, pure_doc_value,
slot_desc->is_nullable());
break;
}
case TYPE_DOUBLE: {
insert_float_value<double>(col, type, col_ptr, pure_doc_value,
slot_desc->is_nullable());
break;
}
case TYPE_FLOAT: {
insert_float_value<float>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_BOOLEAN: {
if (col.IsBool()) {
int8_t val = col.GetBool();
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
break;
}
if (col.IsNumber()) {
int8_t val = col.GetInt();
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
break;
}
bool is_nested_str = false;
if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
int8_t val = col[0].GetBool();
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
break;
} else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
is_nested_str = true;
} else if (pure_doc_value && col.IsArray()) {
return Status::InternalError(
strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
}
const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
const std::string& val = str_col.GetString();
size_t val_size = str_col.GetStringLength();
StringParser::ParseResult result;
bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
break;
}
case TYPE_DECIMALV2: {
DecimalV2Value data;
if (col.IsDouble()) {
data.assign_from_double(col.GetDouble());
} else {
std::string val;
if (pure_doc_value) {
if (!col[0].IsString()) {
@ -664,142 +759,44 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
val = col.GetString();
}
}
size_t val_size = val.length();
col_ptr->insert_data(
const_cast<const char*>(val.data()), val_size);
break;
data.parse_from_str(val.data(), val.length());
}
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
break;
}
case TYPE_TINYINT: {
insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_SMALLINT: {
insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_INT: {
insert_int_value<int32>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_BIGINT: {
insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_LARGEINT: {
insert_int_value<__int128>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_DOUBLE: {
insert_float_value<double>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_FLOAT: {
insert_float_value<float>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
break;
}
case TYPE_BOOLEAN: {
if (col.IsBool()) {
int8_t val = col.GetBool();
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
case TYPE_DATE:
case TYPE_DATETIME: {
// this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
if (col.IsNumber()) {
// ES process date/datetime field would use millisecond timestamp for index or docvalue
// processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
// Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
} else if (col.IsArray() && pure_doc_value) {
// this would happened just only when `enable_docvalue_scan = true`
// ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
// a standard date-format for date field as `2020-06-16T00:00:00.000Z`
// At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
// date field's docvalue
if (col[0].IsString()) {
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type));
break;
}
if (col.IsNumber()) {
int8_t val = col.GetInt();
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
break;
}
bool is_nested_str = false;
if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
int8_t val = col[0].GetBool();
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
break;
} else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
is_nested_str = true;
} else if (pure_doc_value && col.IsArray()) {
return Status::InternalError(
strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
}
const rapidjson::Value& str_col = is_nested_str? col[0]: col;
const std::string& val = str_col.GetString();
size_t val_size = str_col.GetStringLength();
StringParser::ParseResult result;
bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
break;
}
case TYPE_DECIMALV2: {
DecimalV2Value data;
if (col.IsDouble()) {
data.assign_from_double(col.GetDouble());
} else {
std::string val;
if (pure_doc_value) {
if (!col[0].IsString()) {
val = json_value_to_string(col[0]);
} else {
val = col[0].GetString();
}
} else {
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
if (!col.IsString()) {
val = json_value_to_string(col);
} else {
val = col.GetString();
}
}
data.parse_from_str(val.data(), val.length());
}
col_ptr->insert_data(
const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
break;
}
case TYPE_DATE:
case TYPE_DATETIME: {
// this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
if (col.IsNumber()) {
// ES process date/datetime field would use millisecond timestamp for index or docvalue
// processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
// Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
} else if (col.IsArray() && pure_doc_value) {
// this would happened just only when `enable_docvalue_scan = true`
// ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
// a standard date-format for date field as `2020-06-16T00:00:00.000Z`
// At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
// date field's docvalue
if (col[0].IsString()) {
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type));
break;
}
// ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
} else {
// this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
}
break;
}
default: {
DCHECK(false);
break;
// ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
} else {
// this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
}
break;
}
default: {
DCHECK(false);
break;
}
}
}
@ -875,5 +872,4 @@ Status ScrollParser::fill_date_col_with_timestamp(vectorized::IColumn* col_ptr,
return Status::OK();
}
} // namespace doris