[refactor] fix warings when compile with clang (#8069)

2022-02-19 11:29:02 +08:00
parent 8892780091
commit 50864aca7d
200 changed files with 1750 additions and 2617 deletions
--- a/be/src/exec/es/es_scroll_parser.cpp
+++ b/be/src/exec/es/es_scroll_parser.cpp
@ -198,8 +198,7 @@ static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, v

 template <typename T>
 static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type,
-                                 vectorized::IColumn* col_ptr, bool pure_doc_value,
-                                 bool nullable) {
+                                 vectorized::IColumn* col_ptr, bool pure_doc_value, bool nullable) {
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
    if (col.IsNumber() && nullable) {
        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
@ -229,8 +228,7 @@ static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type

 template <typename T>
 static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
-                               vectorized::IColumn* col_ptr, bool pure_doc_value,
-                               bool nullable) {
+                               vectorized::IColumn* col_ptr, bool pure_doc_value, bool nullable) {
    if (col.IsNumber()) {
        T value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
@ -258,8 +256,7 @@ static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
    return Status::OK();
 }

-ScrollParser::ScrollParser(bool doc_value_mode)
-        : _scroll_id(""), _size(0), _line_index(0), _doc_value_mode(doc_value_mode) {}
+ScrollParser::ScrollParser(bool doc_value_mode) : _scroll_id(""), _size(0), _line_index(0) {}

 ScrollParser::~ScrollParser() {}

@ -503,7 +500,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple,
                        strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
            }

-            const rapidjson::Value& str_col = is_nested_str? col[0]: col;
+            const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
            const std::string& val = str_col.GetString();
            size_t val_size = str_col.GetStringLength();
            StringParser::ParseResult result;
@ -643,12 +640,110 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
            return Status::RuntimeError(details);
        }
        switch (type) {
-            case TYPE_CHAR:
-            case TYPE_VARCHAR:
-            case TYPE_STRING: {
-                // sometimes elasticsearch user post some not-string value to Elasticsearch Index.
-                // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
-                // this may be a tricky, but we can workaround this issue
+        case TYPE_CHAR:
+        case TYPE_VARCHAR:
+        case TYPE_STRING: {
+            // sometimes elasticsearch user post some not-string value to Elasticsearch Index.
+            // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
+            // this may be a tricky, but we can workaround this issue
+            std::string val;
+            if (pure_doc_value) {
+                if (!col[0].IsString()) {
+                    val = json_value_to_string(col[0]);
+                } else {
+                    val = col[0].GetString();
+                }
+            } else {
+                RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+                if (!col.IsString()) {
+                    val = json_value_to_string(col);
+                } else {
+                    val = col.GetString();
+                }
+            }
+            size_t val_size = val.length();
+            col_ptr->insert_data(const_cast<const char*>(val.data()), val_size);
+            break;
+        }
+
+        case TYPE_TINYINT: {
+            insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+            break;
+        }
+
+        case TYPE_SMALLINT: {
+            insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+            break;
+        }
+
+        case TYPE_INT: {
+            insert_int_value<int32>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+            break;
+        }
+
+        case TYPE_BIGINT: {
+            insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+            break;
+        }
+
+        case TYPE_LARGEINT: {
+            insert_int_value<__int128>(col, type, col_ptr, pure_doc_value,
+                                       slot_desc->is_nullable());
+            break;
+        }
+
+        case TYPE_DOUBLE: {
+            insert_float_value<double>(col, type, col_ptr, pure_doc_value,
+                                       slot_desc->is_nullable());
+            break;
+        }
+
+        case TYPE_FLOAT: {
+            insert_float_value<float>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+            break;
+        }
+
+        case TYPE_BOOLEAN: {
+            if (col.IsBool()) {
+                int8_t val = col.GetBool();
+                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                break;
+            }
+
+            if (col.IsNumber()) {
+                int8_t val = col.GetInt();
+                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                break;
+            }
+
+            bool is_nested_str = false;
+            if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
+                int8_t val = col[0].GetBool();
+                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                break;
+            } else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
+                is_nested_str = true;
+            } else if (pure_doc_value && col.IsArray()) {
+                return Status::InternalError(
+                        strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
+            }
+
+            const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
+
+            const std::string& val = str_col.GetString();
+            size_t val_size = str_col.GetStringLength();
+            StringParser::ParseResult result;
+            bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
+            RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
+            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
+            break;
+        }
+        case TYPE_DECIMALV2: {
+            DecimalV2Value data;
+
+            if (col.IsDouble()) {
+                data.assign_from_double(col.GetDouble());
+            } else {
                std::string val;
                if (pure_doc_value) {
                    if (!col[0].IsString()) {
@ -664,142 +759,44 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
                        val = col.GetString();
                    }
                }
-                size_t val_size = val.length();
-                col_ptr->insert_data(
-                        const_cast<const char*>(val.data()), val_size);
-                break;
+                data.parse_from_str(val.data(), val.length());
            }
+            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
+            break;
+        }

-            case TYPE_TINYINT: {
-                insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
-                break;
-            }
-
-            case TYPE_SMALLINT: {
-                insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
-                break;
-            }
-
-            case TYPE_INT: {
-                insert_int_value<int32>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
-                break;
-            }
-
-            case TYPE_BIGINT: {
-                insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
-                break;
-            }
-
-            case TYPE_LARGEINT: {
-                insert_int_value<__int128>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
-                break;
-            }
-
-            case TYPE_DOUBLE: {
-                insert_float_value<double>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
-                break;
-            }
-
-            case TYPE_FLOAT: {
-                insert_float_value<float>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
-                break;
-            }
-
-            case TYPE_BOOLEAN: {
-                if (col.IsBool()) {
-                    int8_t val = col.GetBool();
-                    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+        case TYPE_DATE:
+        case TYPE_DATETIME: {
+            // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
+            if (col.IsNumber()) {
+                // ES process date/datetime field would use millisecond timestamp for index or docvalue
+                // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
+                // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
+                RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
+            } else if (col.IsArray() && pure_doc_value) {
+                // this would happened just only when `enable_docvalue_scan = true`
+                // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
+                // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
+                // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
+                // date field's docvalue
+                if (col[0].IsString()) {
+                    RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type));
                    break;
                }
-
-                if (col.IsNumber()) {
-                    int8_t val = col.GetInt();
-                    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
-                    break;
-                }
-
-                bool is_nested_str = false;
-                if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
-                    int8_t val = col[0].GetBool();
-                    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
-                    break;
-                } else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
-                    is_nested_str = true;
-                } else if (pure_doc_value && col.IsArray()) {
-                    return Status::InternalError(
-                            strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
-                }
-
-                const rapidjson::Value& str_col = is_nested_str? col[0]: col;
-
-                const std::string& val = str_col.GetString();
-                size_t val_size = str_col.GetStringLength();
-                StringParser::ParseResult result;
-                bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
-                RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
-                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
-                break;
-            }
-            case TYPE_DECIMALV2: {
-                DecimalV2Value data;
-
-                if (col.IsDouble()) {
-                    data.assign_from_double(col.GetDouble());
-                } else {
-                    std::string val;
-                    if (pure_doc_value) {
-                        if (!col[0].IsString()) {
-                            val = json_value_to_string(col[0]);
-                        } else {
-                            val = col[0].GetString();
-                        }
-                    } else {
-                        RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
-                        if (!col.IsString()) {
-                            val = json_value_to_string(col);
-                        } else {
-                            val = col.GetString();
-                        }
-                    }
-                    data.parse_from_str(val.data(), val.length());
-                }
-                col_ptr->insert_data(
-                        const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
-                break;
-            }
-
-            case TYPE_DATE:
-            case TYPE_DATETIME: {
-                // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
-                if (col.IsNumber()) {
-                    // ES process date/datetime field would use millisecond timestamp for index or docvalue
-                    // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
-                    // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
-                    RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
-                } else if (col.IsArray() && pure_doc_value) {
-                    // this would happened just only when `enable_docvalue_scan = true`
-                    // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
-                    // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
-                    // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
-                    // date field's docvalue
-                    if (col[0].IsString()) {
-                        RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type));
-                        break;
-                    }
-                    // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
-                    RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
-                } else {
-                    // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
-                    RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
-                    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
-                    RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
-                }
-                break;
-            }
-            default: {
-                DCHECK(false);
-                break;
+                // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
+                RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
+            } else {
+                // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
+                RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+                RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
+                RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
            }
+            break;
+        }
+        default: {
+            DCHECK(false);
+            break;
+        }
        }
    }

@ -875,5 +872,4 @@ Status ScrollParser::fill_date_col_with_timestamp(vectorized::IColumn* col_ptr,
    return Status::OK();
 }

-
 } // namespace doris