diff --git a/.asf.yaml b/.asf.yaml index 6948d7b991..87b309320d 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -35,7 +35,7 @@ github: # if strict is true, means "Require branches to be up to date before merging". strict: false contexts: - # - Clang Formatter + - Clang Formatter - Build Extensions - License Check - P0 regression (Doris P0 regression) diff --git a/.clang-format b/.clang-format index 3b8b5704b9..cc1893a5e1 100644 --- a/.clang-format +++ b/.clang-format @@ -14,3 +14,5 @@ ReflowComments: false SortUsingDeclarations: false SpacesBeforeTrailingComments: 1 SpaceBeforeCpp11BracedList: true +#ignore include sort cause adjustment order may leads to compilation failure +SortIncludes: false diff --git a/be/src/agent/agent_server.cpp b/be/src/agent/agent_server.cpp index 6e754f941a..962eff6868 100644 --- a/be/src/agent/agent_server.cpp +++ b/be/src/agent/agent_server.cpp @@ -52,16 +52,14 @@ AgentServer::AgentServer(ExecEnv* exec_env, const TMasterInfo& master_info) // to make code to be more readable. #ifndef BE_TEST -#define CREATE_AND_START_POOL(type, pool_name) \ - pool_name.reset( \ - new TaskWorkerPool(TaskWorkerPool::TaskWorkerType::type, _exec_env, master_info, \ - TaskWorkerPool::ThreadModel::MULTI_THREADS)); \ +#define CREATE_AND_START_POOL(type, pool_name) \ + pool_name.reset(new TaskWorkerPool(TaskWorkerPool::TaskWorkerType::type, _exec_env, \ + master_info, TaskWorkerPool::ThreadModel::MULTI_THREADS)); \ pool_name->start(); -#define CREATE_AND_START_THREAD(type, pool_name) \ - pool_name.reset( \ - new TaskWorkerPool(TaskWorkerPool::TaskWorkerType::type, _exec_env, master_info, \ - TaskWorkerPool::ThreadModel::SINGLE_THREAD)); \ +#define CREATE_AND_START_THREAD(type, pool_name) \ + pool_name.reset(new TaskWorkerPool(TaskWorkerPool::TaskWorkerType::type, _exec_env, \ + master_info, TaskWorkerPool::ThreadModel::SINGLE_THREAD)); \ pool_name->start(); #else #define CREATE_AND_START_POOL(type, pool_name) @@ -153,7 +151,8 @@ void AgentServer::submit_tasks(TAgentResult& agent_result, HANDLE_TYPE(TTaskType::UPDATE_TABLET_META_INFO, _update_tablet_meta_info_workers, update_tablet_meta_info_req); HANDLE_TYPE(TTaskType::COMPACTION, _submit_table_compaction_workers, compaction_req); - HANDLE_TYPE(TTaskType::STORAGE_MEDIUM_MIGRATE_V2, _storage_medium_migrate_v2_workers, storage_migration_req_v2); + HANDLE_TYPE(TTaskType::STORAGE_MEDIUM_MIGRATE_V2, _storage_medium_migrate_v2_workers, + storage_migration_req_v2); case TTaskType::REALTIME_PUSH: case TTaskType::PUSH: @@ -212,8 +211,8 @@ void AgentServer::make_snapshot(TAgentResult& t_agent_result, const TSnapshotRequest& snapshot_request) { string snapshot_path; bool allow_incremental_clone = false; - Status err_code = - SnapshotManager::instance()->make_snapshot(snapshot_request, &snapshot_path, &allow_incremental_clone); + Status err_code = SnapshotManager::instance()->make_snapshot(snapshot_request, &snapshot_path, + &allow_incremental_clone); if (!err_code) { LOG(WARNING) << "fail to make_snapshot. tablet_id=" << snapshot_request.tablet_id << ", schema_hash=" << snapshot_request.schema_hash diff --git a/be/src/agent/cgroups_mgr.h b/be/src/agent/cgroups_mgr.h index 065c03acaa..be5cc1c105 100644 --- a/be/src/agent/cgroups_mgr.h +++ b/be/src/agent/cgroups_mgr.h @@ -57,8 +57,8 @@ public: // level_share: a mapping for shares for different levels under the user. // mapping key is level name; value is level's share. Currently, different resource using the same share. Status modify_user_cgroups(const std::string& user_name, - const std::map& user_share, - const std::map& level_share); + const std::map& user_share, + const std::map& level_share); static void apply_cgroup(const std::string& user_name, const std::string& level); @@ -76,7 +76,7 @@ public: // thread_id: the unique id for the thread // user_name&level: the user name and level used to find the cgroup Status assign_thread_to_cgroups(int64_t thread_id, const std::string& user_name, - const std::string& level); + const std::string& level); // Delete the user's cgroups and its sub level cgroups using DropCgroups // Input parameters: @@ -123,11 +123,11 @@ public: // ssd_write_iops: write iops number for ssd disk. // ssd_read_mbps: read bps number for ssd disk, using mb not byte or kb. // ssd_write_mbps: write bps number for ssd disk, using mb not byte or kb. - Status _config_disk_throttle(std::string user_name, std::string level, - int64_t hdd_read_iops, int64_t hdd_write_iops, - int64_t hdd_read_mbps, int64_t hdd_write_mbps, - int64_t ssd_read_iops, int64_t ssd_write_iops, - int64_t ssd_read_mbps, int64_t ssd_write_mbps); + Status _config_disk_throttle(std::string user_name, std::string level, int64_t hdd_read_iops, + int64_t hdd_write_iops, int64_t hdd_read_mbps, + int64_t hdd_write_mbps, int64_t ssd_read_iops, + int64_t ssd_write_iops, int64_t ssd_read_mbps, + int64_t ssd_write_mbps); // echo command in string stream to the cgroup file // Input parameters: diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp index fa2702569d..79a9847eaa 100644 --- a/be/src/agent/heartbeat_server.cpp +++ b/be/src/agent/heartbeat_server.cpp @@ -171,8 +171,8 @@ Status HeartbeatServer::_heartbeat(const TMasterInfo& master_info) { } Status create_heartbeat_server(ExecEnv* exec_env, uint32_t server_port, - ThriftServer** thrift_server, uint32_t worker_thread_num, - TMasterInfo* local_master_info) { + ThriftServer** thrift_server, uint32_t worker_thread_num, + TMasterInfo* local_master_info) { HeartbeatServer* heartbeat_server = new (nothrow) HeartbeatServer(local_master_info); if (heartbeat_server == nullptr) { return Status::InternalError("Get heartbeat server failed"); diff --git a/be/src/agent/heartbeat_server.h b/be/src/agent/heartbeat_server.h index 8c1b12c630..c492fc608c 100644 --- a/be/src/agent/heartbeat_server.h +++ b/be/src/agent/heartbeat_server.h @@ -37,7 +37,7 @@ class ThriftServer; class HeartbeatServer : public HeartbeatServiceIf { public: explicit HeartbeatServer(TMasterInfo* master_info); - virtual ~HeartbeatServer(){}; + virtual ~HeartbeatServer() {}; virtual void init_cluster_id(); @@ -66,7 +66,7 @@ private: }; // class HeartBeatServer Status create_heartbeat_server(ExecEnv* exec_env, uint32_t heartbeat_server_port, - ThriftServer** heart_beat_server, uint32_t worker_thread_num, - TMasterInfo* local_master_info); + ThriftServer** heart_beat_server, uint32_t worker_thread_num, + TMasterInfo* local_master_info); } // namespace doris #endif // DORIS_BE_SRC_AGENT_HEARTBEAT_SERVER_H diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 6b22584153..b9682acd8b 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -371,7 +371,8 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { } else { ++_s_report_version; // get path hash of the created tablet - TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(create_tablet_req.tablet_id); + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( + create_tablet_req.tablet_id); DCHECK(tablet != nullptr); TTabletInfo tablet_info; tablet_info.tablet_id = tablet->table_id(); @@ -963,7 +964,7 @@ void TaskWorkerPool::_storage_medium_migrate_worker_thread_callback() { } Status TaskWorkerPool::_check_migrate_request(const TStorageMediumMigrateReq& req, - TabletSharedPtr& tablet, DataDir** dest_store) { + TabletSharedPtr& tablet, DataDir** dest_store) { int64_t tablet_id = req.tablet_id; tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); if (tablet == nullptr) { @@ -1008,7 +1009,7 @@ Status TaskWorkerPool::_check_migrate_request(const TStorageMediumMigrateReq& re } if (tablet->data_dir()->path() == (*dest_store)->path()) { LOG(INFO) << "tablet is already on specified path. " - << "path=" << tablet->data_dir()->path(); + << "path=" << tablet->data_dir()->path(); return Status::OLAPInternalError(OLAP_REQUEST_FAILED); } @@ -1379,7 +1380,8 @@ void TaskWorkerPool::_make_snapshot_thread_callback() { << ", schema_hash:" << snapshot_request.schema_hash << ", version:" << snapshot_request.version << ", status: " << make_snapshot_status.to_string(); - error_msgs.push_back("make_snapshot failed. status: " + make_snapshot_status.get_error_msg()); + error_msgs.push_back("make_snapshot failed. status: " + + make_snapshot_status.get_error_msg()); } else { LOG(INFO) << "make_snapshot success. tablet_id:" << snapshot_request.tablet_id << ", schema_hash:" << snapshot_request.schema_hash @@ -1399,7 +1401,8 @@ void TaskWorkerPool::_make_snapshot_thread_callback() { << ", schema_hash:" << snapshot_request.schema_hash << ", version:" << snapshot_request.version << ",list file failed: " << st.to_string(); - error_msgs.push_back("make_snapshot failed. list file failed: " + st.get_error_msg()); + error_msgs.push_back("make_snapshot failed. list file failed: " + + st.get_error_msg()); } } } @@ -1471,9 +1474,8 @@ void TaskWorkerPool::_release_snapshot_thread_callback() { } } -Status TaskWorkerPool::_get_tablet_info(const TTabletId tablet_id, - const TSchemaHash schema_hash, int64_t signature, - TTabletInfo* tablet_info) { +Status TaskWorkerPool::_get_tablet_info(const TTabletId tablet_id, const TSchemaHash schema_hash, + int64_t signature, TTabletInfo* tablet_info) { Status status = Status::OK(); tablet_info->__set_tablet_id(tablet_id); tablet_info->__set_schema_hash(schema_hash); @@ -1506,8 +1508,8 @@ void TaskWorkerPool::_move_dir_thread_callback() { } LOG(INFO) << "get move dir task, signature:" << agent_task_req.signature << ", job id:" << move_dir_req.job_id; - Status status = - _move_dir(move_dir_req.tablet_id, move_dir_req.src, move_dir_req.job_id, true /* TODO */); + Status status = _move_dir(move_dir_req.tablet_id, move_dir_req.src, move_dir_req.job_id, + true /* TODO */); if (!status.ok()) { LOG(WARNING) << "failed to move dir: " << move_dir_req.src @@ -1532,9 +1534,9 @@ void TaskWorkerPool::_move_dir_thread_callback() { } } -Status TaskWorkerPool::_move_dir(const TTabletId tablet_id, const std::string& src, int64_t job_id, bool overwrite) { - TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); +Status TaskWorkerPool::_move_dir(const TTabletId tablet_id, const std::string& src, int64_t job_id, + bool overwrite) { + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); if (tablet == nullptr) { LOG(INFO) << "failed to get tablet. tablet_id:" << tablet_id; return Status::InvalidArgument("Could not find tablet"); @@ -1686,12 +1688,13 @@ void TaskWorkerPool::_storage_medium_migrate_v2_worker_thread_callback() { TFinishTaskRequest finish_task_request; TTaskType::type task_type = agent_task_req.task_type; switch (task_type) { - case TTaskType::STORAGE_MEDIUM_MIGRATE_V2: - _storage_medium_migrate_v2(agent_task_req, signature, task_type, &finish_task_request); - break; - default: - // pass - break; + case TTaskType::STORAGE_MEDIUM_MIGRATE_V2: + _storage_medium_migrate_v2(agent_task_req, signature, task_type, + &finish_task_request); + break; + default: + // pass + break; } _finish_task(finish_task_request); } @@ -1699,24 +1702,25 @@ void TaskWorkerPool::_storage_medium_migrate_v2_worker_thread_callback() { } } -void TaskWorkerPool::_storage_medium_migrate_v2(const TAgentTaskRequest& agent_task_req, int64_t signature, - const TTaskType::type task_type, TFinishTaskRequest* finish_task_request) { +void TaskWorkerPool::_storage_medium_migrate_v2(const TAgentTaskRequest& agent_task_req, + int64_t signature, const TTaskType::type task_type, + TFinishTaskRequest* finish_task_request) { Status status = Status::OK(); TStatus task_status; std::vector error_msgs; string process_name; switch (task_type) { - case TTaskType::STORAGE_MEDIUM_MIGRATE_V2: - process_name = "StorageMediumMigrationV2"; - break; - default: - std::string task_name; - EnumToString(TTaskType, task_type, task_name); - LOG(WARNING) << "Storage medium migration v2 type invalid. type: " << task_name - << ", signature: " << signature; - status = Status::NotSupported("Storage medium migration v2 type invalid"); - break; + case TTaskType::STORAGE_MEDIUM_MIGRATE_V2: + process_name = "StorageMediumMigrationV2"; + break; + default: + std::string task_name; + EnumToString(TTaskType, task_type, task_name); + LOG(WARNING) << "Storage medium migration v2 type invalid. type: " << task_name + << ", signature: " << signature; + status = Status::NotSupported("Storage medium migration v2 type invalid"); + break; } // Check last storage medium migration v2 status, if failed delete tablet file diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h index e413211357..98895fe5c3 100644 --- a/be/src/agent/task_worker_pool.h +++ b/be/src/agent/task_worker_pool.h @@ -199,16 +199,19 @@ private: Status _get_tablet_info(const TTabletId tablet_id, const TSchemaHash schema_hash, int64_t signature, TTabletInfo* tablet_info); - Status _move_dir(const TTabletId tablet_id, const std::string& src, int64_t job_id, bool overwrite); + Status _move_dir(const TTabletId tablet_id, const std::string& src, int64_t job_id, + bool overwrite); Status _check_migrate_request(const TStorageMediumMigrateReq& req, TabletSharedPtr& tablet, - DataDir** dest_store); + DataDir** dest_store); // random sleep 1~second seconds void _random_sleep(int second); void _storage_medium_migrate_v2(const TAgentTaskRequest& agent_task_req, int64_t signature, - const TTaskType::type task_type, TFinishTaskRequest* finish_task_request); + const TTaskType::type task_type, + TFinishTaskRequest* finish_task_request); + private: std::string _name; diff --git a/be/src/agent/user_resource_listener.cpp b/be/src/agent/user_resource_listener.cpp index 8f6ca65c23..56cd0f1407 100644 --- a/be/src/agent/user_resource_listener.cpp +++ b/be/src/agent/user_resource_listener.cpp @@ -52,8 +52,8 @@ void UserResourceListener::handle_update(const TAgentServiceVersion::type& proto if (updates.size() > 0) { int64_t new_version = updates[0].int_value; // Async call to update users resource method - auto res = std::async(std::launch::async, &UserResourceListener::update_users_resource, this, - new_version); + auto res = std::async(std::launch::async, &UserResourceListener::update_users_resource, + this, new_version); res.get(); } } diff --git a/be/src/agent/utils.cpp b/be/src/agent/utils.cpp index 0c850464d7..ae243c2165 100644 --- a/be/src/agent/utils.cpp +++ b/be/src/agent/utils.cpp @@ -41,8 +41,7 @@ MasterServerClient::MasterServerClient(const TMasterInfo& master_info, FrontendServiceClientCache* client_cache) : _master_info(master_info), _client_cache(client_cache) {} -Status MasterServerClient::finish_task(const TFinishTaskRequest& request, - TMasterResult* result) { +Status MasterServerClient::finish_task(const TFinishTaskRequest& request, TMasterResult* result) { Status client_status; FrontendServiceConnection client(_client_cache, _master_info.network_address, config::thrift_rpc_timeout_ms, &client_status); diff --git a/be/src/agent/utils.h b/be/src/agent/utils.h index f5b822463f..1e1299ac2c 100644 --- a/be/src/agent/utils.h +++ b/be/src/agent/utils.h @@ -29,7 +29,7 @@ namespace doris { class MasterServerClient { public: MasterServerClient(const TMasterInfo& master_info, FrontendServiceClientCache* client_cache); - virtual ~MasterServerClient(){}; + virtual ~MasterServerClient() {}; // Report finished task to the master server // @@ -59,8 +59,8 @@ private: class AgentUtils { public: - AgentUtils(){}; - virtual ~AgentUtils(){}; + AgentUtils() {}; + virtual ~AgentUtils() {}; // Execute shell cmd virtual bool exec_cmd(const std::string& command, std::string* errmsg, diff --git a/be/src/common/configbase.cpp b/be/src/common/configbase.cpp index 3258da3e91..a34f6b08ab 100644 --- a/be/src/common/configbase.cpp +++ b/be/src/common/configbase.cpp @@ -233,7 +233,8 @@ bool Properties::load(const char* conf_file, bool must_exist) { } template -bool Properties::get_or_default(const char* key, const char* defstr, T& retval, bool* is_retval_set) const { +bool Properties::get_or_default(const char* key, const char* defstr, T& retval, + bool* is_retval_set) const { const auto& it = file_conf_map.find(std::string(key)); std::string valstr; if (it == file_conf_map.end()) { @@ -295,33 +296,35 @@ std::ostream& operator<<(std::ostream& out, const std::vector& v) { return out; } -#define SET_FIELD(FIELD, TYPE, FILL_CONF_MAP, SET_TO_DEFAULT) \ - if (strcmp((FIELD).type, #TYPE) == 0) { \ - TYPE new_value = TYPE(); \ - bool is_newval_set = false; \ - if (!props.get_or_default((FIELD).name, \ - ((SET_TO_DEFAULT) ? (FIELD).defval : nullptr), new_value, &is_newval_set)) { \ - std::cerr << "config field error: " << (FIELD).name << std::endl; \ - return false; \ - } \ - if (!is_newval_set) { \ - continue; \ - } \ - TYPE& ref_conf_value = *reinterpret_cast((FIELD).storage); \ - TYPE old_value = ref_conf_value; \ - ref_conf_value = new_value; \ - auto validator = RegisterConfValidator::_s_field_validator->find((FIELD).name); \ - if (validator != RegisterConfValidator::_s_field_validator->end() && !(validator->second)()) { \ - ref_conf_value = old_value; \ - std::cerr << "validate " << (FIELD).name << "=" << new_value << " failed" << std::endl; \ - return false; \ - } \ - if (FILL_CONF_MAP) { \ - std::ostringstream oss; \ - oss << ref_conf_value; \ - (*full_conf_map)[(FIELD).name] = oss.str(); \ - } \ - continue; \ +#define SET_FIELD(FIELD, TYPE, FILL_CONF_MAP, SET_TO_DEFAULT) \ + if (strcmp((FIELD).type, #TYPE) == 0) { \ + TYPE new_value = TYPE(); \ + bool is_newval_set = false; \ + if (!props.get_or_default((FIELD).name, ((SET_TO_DEFAULT) ? (FIELD).defval : nullptr), \ + new_value, &is_newval_set)) { \ + std::cerr << "config field error: " << (FIELD).name << std::endl; \ + return false; \ + } \ + if (!is_newval_set) { \ + continue; \ + } \ + TYPE& ref_conf_value = *reinterpret_cast((FIELD).storage); \ + TYPE old_value = ref_conf_value; \ + ref_conf_value = new_value; \ + auto validator = RegisterConfValidator::_s_field_validator->find((FIELD).name); \ + if (validator != RegisterConfValidator::_s_field_validator->end() && \ + !(validator->second)()) { \ + ref_conf_value = old_value; \ + std::cerr << "validate " << (FIELD).name << "=" << new_value << " failed" \ + << std::endl; \ + return false; \ + } \ + if (FILL_CONF_MAP) { \ + std::ostringstream oss; \ + oss << ref_conf_value; \ + (*full_conf_map)[(FIELD).name] = oss.str(); \ + } \ + continue; \ } // init conf fields @@ -355,32 +358,33 @@ bool init(const char* conf_file, bool fill_conf_map, bool must_exist, bool set_t return true; } -#define UPDATE_FIELD(FIELD, VALUE, TYPE, PERSIST) \ - if (strcmp((FIELD).type, #TYPE) == 0) { \ - TYPE new_value; \ - if (!convert((VALUE), new_value)) { \ - return Status::InvalidArgument( \ - strings::Substitute("convert '$0' as $1 failed", VALUE, #TYPE)); \ - } \ - TYPE& ref_conf_value = *reinterpret_cast((FIELD).storage); \ - TYPE old_value = ref_conf_value; \ - ref_conf_value = new_value; \ - auto validator = RegisterConfValidator::_s_field_validator->find((FIELD).name); \ - if (validator != RegisterConfValidator::_s_field_validator->end() && !(validator->second)()) { \ - ref_conf_value = old_value; \ - return Status::InvalidArgument( \ - strings::Substitute("validate $0=$1 failed", (FIELD).name, new_value)); \ - } \ - ref_conf_value = new_value; \ - if (full_conf_map != nullptr) { \ - std::ostringstream oss; \ - oss << new_value; \ - (*full_conf_map)[(FIELD).name] = oss.str(); \ - } \ - if (PERSIST) { \ - persist_config(std::string((FIELD).name), VALUE); \ - } \ - return Status::OK(); \ +#define UPDATE_FIELD(FIELD, VALUE, TYPE, PERSIST) \ + if (strcmp((FIELD).type, #TYPE) == 0) { \ + TYPE new_value; \ + if (!convert((VALUE), new_value)) { \ + return Status::InvalidArgument( \ + strings::Substitute("convert '$0' as $1 failed", VALUE, #TYPE)); \ + } \ + TYPE& ref_conf_value = *reinterpret_cast((FIELD).storage); \ + TYPE old_value = ref_conf_value; \ + ref_conf_value = new_value; \ + auto validator = RegisterConfValidator::_s_field_validator->find((FIELD).name); \ + if (validator != RegisterConfValidator::_s_field_validator->end() && \ + !(validator->second)()) { \ + ref_conf_value = old_value; \ + return Status::InvalidArgument( \ + strings::Substitute("validate $0=$1 failed", (FIELD).name, new_value)); \ + } \ + ref_conf_value = new_value; \ + if (full_conf_map != nullptr) { \ + std::ostringstream oss; \ + oss << new_value; \ + (*full_conf_map)[(FIELD).name] = oss.str(); \ + } \ + if (PERSIST) { \ + persist_config(std::string((FIELD).name), VALUE); \ + } \ + return Status::OK(); \ } // write config to be_custom.conf @@ -427,7 +431,9 @@ Status set_config(const std::string& field, const std::string& value, bool need_ "'$0' is type of '$1' which is not support to modify", field, it->second.type)); } -std::mutex* get_mutable_string_config_lock() { return &mutable_string_config_lock; } +std::mutex* get_mutable_string_config_lock() { + return &mutable_string_config_lock; +} std::vector> get_config_info() { std::vector> configs; @@ -443,7 +449,7 @@ std::vector> get_config_info() { _config.push_back(field_it->second.type); _config.push_back(it.second); - _config.push_back(field_it->second.valmutable ? "true":"false"); + _config.push_back(field_it->second.valmutable ? "true" : "false"); configs.push_back(_config); } diff --git a/be/src/common/configbase.h b/be/src/common/configbase.h index 6773e367ee..ff41c1bcbb 100644 --- a/be/src/common/configbase.h +++ b/be/src/common/configbase.h @@ -60,7 +60,6 @@ public: Field field(ftype, fname, fstorage, fdefval, fvalmutable); _s_field_map->insert(std::make_pair(std::string(fname), field)); } - }; // RegisterConfValidator class is used to store validator function of registered config fields in @@ -90,10 +89,10 @@ public: #define DECLARE_FIELD(FIELD_TYPE, FIELD_NAME) extern FIELD_TYPE FIELD_NAME; -#define DEFINE_VALIDATOR(FIELD_NAME, VALIDATOR) \ - static auto validator_##FIELD_NAME = VALIDATOR; \ - static RegisterConfValidator reg_validator_##FIELD_NAME(#FIELD_NAME, \ - []() -> bool { return validator_##FIELD_NAME(FIELD_NAME); }); +#define DEFINE_VALIDATOR(FIELD_NAME, VALIDATOR) \ + static auto validator_##FIELD_NAME = VALIDATOR; \ + static RegisterConfValidator reg_validator_##FIELD_NAME( \ + #FIELD_NAME, []() -> bool { return validator_##FIELD_NAME(FIELD_NAME); }); #define DECLARE_VALIDATOR(FIELD_NAME) ; diff --git a/be/src/common/signal_handler.h b/be/src/common/signal_handler.h index 9faae8cc88..591dc8b6ef 100644 --- a/be/src/common/signal_handler.h +++ b/be/src/common/signal_handler.h @@ -38,10 +38,10 @@ #include #include #ifdef HAVE_UCONTEXT_H -# include +#include #endif #ifdef HAVE_SYS_UCONTEXT_H -# include +#include #endif #include @@ -55,15 +55,11 @@ namespace { // // The list should be synced with the comment in signalhandler.h. const struct { - int number; - const char *name; + int number; + const char* name; } kFailureSignals[] = { - { SIGSEGV, "SIGSEGV" }, - { SIGILL, "SIGILL" }, - { SIGFPE, "SIGFPE" }, - { SIGABRT, "SIGABRT" }, - { SIGBUS, "SIGBUS" }, - { SIGTERM, "SIGTERM" }, + {SIGSEGV, "SIGSEGV"}, {SIGILL, "SIGILL"}, {SIGFPE, "SIGFPE"}, + {SIGABRT, "SIGABRT"}, {SIGBUS, "SIGBUS"}, {SIGTERM, "SIGTERM"}, }; static bool kFailureSignalHandlerInstalled = false; @@ -85,163 +81,160 @@ static bool kFailureSignalHandlerInstalled = false; * These signal explainer is copied from Meta's Folly */ const char* sigill_reason(int si_code) { - switch (si_code) { + switch (si_code) { case ILL_ILLOPC: - return "illegal opcode"; + return "illegal opcode"; case ILL_ILLOPN: - return "illegal operand"; + return "illegal operand"; case ILL_ILLADR: - return "illegal addressing mode"; + return "illegal addressing mode"; case ILL_ILLTRP: - return "illegal trap"; + return "illegal trap"; case ILL_PRVOPC: - return "privileged opcode"; + return "privileged opcode"; case ILL_PRVREG: - return "privileged register"; + return "privileged register"; case ILL_COPROC: - return "coprocessor error"; + return "coprocessor error"; case ILL_BADSTK: - return "internal stack error"; + return "internal stack error"; default: - return nullptr; - } + return nullptr; + } } const char* sigfpe_reason(int si_code) { - switch (si_code) { + switch (si_code) { case FPE_INTDIV: - return "integer divide by zero"; + return "integer divide by zero"; case FPE_INTOVF: - return "integer overflow"; + return "integer overflow"; case FPE_FLTDIV: - return "floating-point divide by zero"; + return "floating-point divide by zero"; case FPE_FLTOVF: - return "floating-point overflow"; + return "floating-point overflow"; case FPE_FLTUND: - return "floating-point underflow"; + return "floating-point underflow"; case FPE_FLTRES: - return "floating-point inexact result"; + return "floating-point inexact result"; case FPE_FLTINV: - return "floating-point invalid operation"; + return "floating-point invalid operation"; case FPE_FLTSUB: - return "subscript out of range"; + return "subscript out of range"; default: - return nullptr; - } + return nullptr; + } } const char* sigsegv_reason(int si_code) { - switch (si_code) { + switch (si_code) { case SEGV_MAPERR: - return "address not mapped to object"; + return "address not mapped to object"; case SEGV_ACCERR: - return "invalid permissions for mapped object"; + return "invalid permissions for mapped object"; default: - return nullptr; - } + return nullptr; + } } const char* sigbus_reason(int si_code) { - switch (si_code) { + switch (si_code) { case BUS_ADRALN: - return "invalid address alignment"; + return "invalid address alignment"; case BUS_ADRERR: - return "nonexistent physical address"; + return "nonexistent physical address"; case BUS_OBJERR: - return "object-specific hardware error"; + return "object-specific hardware error"; - // MCEERR_AR and MCEERR_AO: in sigaction(2) but not in headers. + // MCEERR_AR and MCEERR_AO: in sigaction(2) but not in headers. default: - return nullptr; - } + return nullptr; + } } const char* signal_reason(int signum, int si_code) { - switch (signum) { + switch (signum) { case SIGILL: - return sigill_reason(si_code); + return sigill_reason(si_code); case SIGFPE: - return sigfpe_reason(si_code); + return sigfpe_reason(si_code); case SIGSEGV: - return sigsegv_reason(si_code); + return sigsegv_reason(si_code); case SIGBUS: - return sigbus_reason(si_code); + return sigbus_reason(si_code); default: - return nullptr; - } + return nullptr; + } } // The class is used for formatting error messages. We don't use printf() // as it's not async signal safe. class MinimalFormatter { - public: - MinimalFormatter(char *buffer, size_t size) - : buffer_(buffer), - cursor_(buffer), - end_(buffer + size) { - } +public: + MinimalFormatter(char* buffer, size_t size) + : buffer_(buffer), cursor_(buffer), end_(buffer + size) {} - // Returns the number of bytes written in the buffer. - std::size_t num_bytes_written() const { return static_cast(cursor_ - buffer_); } + // Returns the number of bytes written in the buffer. + std::size_t num_bytes_written() const { return static_cast(cursor_ - buffer_); } - // Appends string from "str" and updates the internal cursor. - void AppendString(const char* str) { - ptrdiff_t i = 0; - while (str[i] != '\0' && cursor_ + i < end_) { - cursor_[i] = str[i]; - ++i; + // Appends string from "str" and updates the internal cursor. + void AppendString(const char* str) { + ptrdiff_t i = 0; + while (str[i] != '\0' && cursor_ + i < end_) { + cursor_[i] = str[i]; + ++i; + } + cursor_ += i; } - cursor_ += i; - } - // Formats "number" in "radix" and updates the internal cursor. - // Lowercase letters are used for 'a' - 'z'. - void AppendUint64(uint64 number, unsigned radix) { - unsigned i = 0; - while (cursor_ + i < end_) { - const uint64 tmp = number % radix; - number /= radix; - cursor_[i] = static_cast(tmp < 10 ? '0' + tmp : 'a' + tmp - 10); - ++i; - if (number == 0) { - break; - } + // Formats "number" in "radix" and updates the internal cursor. + // Lowercase letters are used for 'a' - 'z'. + void AppendUint64(uint64 number, unsigned radix) { + unsigned i = 0; + while (cursor_ + i < end_) { + const uint64 tmp = number % radix; + number /= radix; + cursor_[i] = static_cast(tmp < 10 ? '0' + tmp : 'a' + tmp - 10); + ++i; + if (number == 0) { + break; + } + } + // Reverse the bytes written. + std::reverse(cursor_, cursor_ + i); + cursor_ += i; } - // Reverse the bytes written. - std::reverse(cursor_, cursor_ + i); - cursor_ += i; - } - // Formats "number" as hexadecimal number, and updates the internal - // cursor. Padding will be added in front if needed. - void AppendHexWithPadding(uint64 number, int width) { - char* start = cursor_; - AppendString("0x"); - AppendUint64(number, 16); - // Move to right and add padding in front if needed. - if (cursor_ < start + width) { - const int64 delta = start + width - cursor_; - std::copy(start, cursor_, start + delta); - std::fill(start, start + delta, ' '); - cursor_ = start + width; + // Formats "number" as hexadecimal number, and updates the internal + // cursor. Padding will be added in front if needed. + void AppendHexWithPadding(uint64 number, int width) { + char* start = cursor_; + AppendString("0x"); + AppendUint64(number, 16); + // Move to right and add padding in front if needed. + if (cursor_ < start + width) { + const int64 delta = start + width - cursor_; + std::copy(start, cursor_, start + delta); + std::fill(start, start + delta, ' '); + cursor_ = start + width; + } } - } - private: - char *buffer_; - char *cursor_; - const char * const end_; +private: + char* buffer_; + char* cursor_; + const char* const end_; }; // Writes the given data with the size to the standard error. void WriteToStderr(const char* data, size_t size) { - if (write(STDERR_FILENO, data, size) < 0) { - // Ignore errors. - } + if (write(STDERR_FILENO, data, size) < 0) { + // Ignore errors. + } } // The writer function can be changed by InstallFailureWriter(). @@ -250,80 +243,79 @@ void (*g_failure_writer)(const char* data, size_t size) = WriteToStderr; // Dumps time information. We don't dump human-readable time information // as localtime() is not guaranteed to be async signal safe. void DumpTimeInfo() { - time_t time_in_sec = time(NULL); - char buf[256]; // Big enough for time info. - MinimalFormatter formatter(buf, sizeof(buf)); - formatter.AppendString("*** Aborted at "); - formatter.AppendUint64(static_cast(time_in_sec), 10); - formatter.AppendString(" (unix time)"); - formatter.AppendString(" try \"date -d @"); - formatter.AppendUint64(static_cast(time_in_sec), 10); - formatter.AppendString("\" if you are using GNU date ***\n"); - g_failure_writer(buf, formatter.num_bytes_written()); + time_t time_in_sec = time(NULL); + char buf[256]; // Big enough for time info. + MinimalFormatter formatter(buf, sizeof(buf)); + formatter.AppendString("*** Aborted at "); + formatter.AppendUint64(static_cast(time_in_sec), 10); + formatter.AppendString(" (unix time)"); + formatter.AppendString(" try \"date -d @"); + formatter.AppendUint64(static_cast(time_in_sec), 10); + formatter.AppendString("\" if you are using GNU date ***\n"); + g_failure_writer(buf, formatter.num_bytes_written()); } // Dumps information about the signal to STDERR. -void DumpSignalInfo(int signal_number, siginfo_t *siginfo) { - // Get the signal name. - const char* signal_name = NULL; - for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) { - if (signal_number == kFailureSignals[i].number) { - signal_name = kFailureSignals[i].name; +void DumpSignalInfo(int signal_number, siginfo_t* siginfo) { + // Get the signal name. + const char* signal_name = NULL; + for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) { + if (signal_number == kFailureSignals[i].number) { + signal_name = kFailureSignals[i].name; + } } - } - char buf[256]; // Big enough for signal info. - MinimalFormatter formatter(buf, sizeof(buf)); + char buf[256]; // Big enough for signal info. + MinimalFormatter formatter(buf, sizeof(buf)); - formatter.AppendString("*** "); - if (signal_name) { - formatter.AppendString(signal_name); - } else { - // Use the signal number if the name is unknown. The signal name - // should be known, but just in case. - formatter.AppendString("Signal "); - formatter.AppendUint64(static_cast(signal_number), 10); - } - formatter.AppendString(" "); - // Detail reason explain - auto reason = signal_reason(signal_number, siginfo->si_code); + formatter.AppendString("*** "); + if (signal_name) { + formatter.AppendString(signal_name); + } else { + // Use the signal number if the name is unknown. The signal name + // should be known, but just in case. + formatter.AppendString("Signal "); + formatter.AppendUint64(static_cast(signal_number), 10); + } + formatter.AppendString(" "); + // Detail reason explain + auto reason = signal_reason(signal_number, siginfo->si_code); - // If we can't find a reason code make a best effort to print the (int) code. - if (reason != nullptr) { - formatter.AppendString(reason); - } else { - formatter.AppendString("unkown detail explain"); - } - formatter.AppendString(" (@0x"); - formatter.AppendUint64(reinterpret_cast(siginfo->si_addr), 16); - formatter.AppendString(")"); - formatter.AppendString(" received by PID "); - formatter.AppendUint64(static_cast(getpid()), 10); - formatter.AppendString(" (TID 0x"); - // We assume pthread_t is an integral number or a pointer, rather - // than a complex struct. In some environments, pthread_self() - // returns an uint64 but in some other environments pthread_self() - // returns a pointer. - pthread_t id = pthread_self(); - formatter.AppendUint64( - reinterpret_cast(reinterpret_cast(id)), 16); - formatter.AppendString(") "); - // Only linux has the PID of the signal sender in si_pid. - formatter.AppendString("from PID "); - formatter.AppendUint64(static_cast(siginfo->si_pid), 10); - formatter.AppendString("; "); - formatter.AppendString("stack trace: ***\n"); - g_failure_writer(buf, formatter.num_bytes_written()); + // If we can't find a reason code make a best effort to print the (int) code. + if (reason != nullptr) { + formatter.AppendString(reason); + } else { + formatter.AppendString("unkown detail explain"); + } + formatter.AppendString(" (@0x"); + formatter.AppendUint64(reinterpret_cast(siginfo->si_addr), 16); + formatter.AppendString(")"); + formatter.AppendString(" received by PID "); + formatter.AppendUint64(static_cast(getpid()), 10); + formatter.AppendString(" (TID 0x"); + // We assume pthread_t is an integral number or a pointer, rather + // than a complex struct. In some environments, pthread_self() + // returns an uint64 but in some other environments pthread_self() + // returns a pointer. + pthread_t id = pthread_self(); + formatter.AppendUint64(reinterpret_cast(reinterpret_cast(id)), 16); + formatter.AppendString(") "); + // Only linux has the PID of the signal sender in si_pid. + formatter.AppendString("from PID "); + formatter.AppendUint64(static_cast(siginfo->si_pid), 10); + formatter.AppendString("; "); + formatter.AppendString("stack trace: ***\n"); + g_failure_writer(buf, formatter.num_bytes_written()); } // Invoke the default signal handler. void InvokeDefaultSignalHandler(int signal_number) { - struct sigaction sig_action; - memset(&sig_action, 0, sizeof(sig_action)); - sigemptyset(&sig_action.sa_mask); - sig_action.sa_handler = SIG_DFL; - sigaction(signal_number, &sig_action, NULL); - kill(getpid(), signal_number); + struct sigaction sig_action; + memset(&sig_action, 0, sizeof(sig_action)); + sigemptyset(&sig_action.sa_mask); + sig_action.sa_handler = SIG_DFL; + sigaction(signal_number, &sig_action, NULL); + kill(getpid(), signal_number); } // This variable is used for protecting FailureSignalHandler() from @@ -336,110 +328,104 @@ static pthread_t* g_entered_thread_id_pointer = NULL; // defined, we try the CPU specific logics (we only support x86 and // x86_64 for now) first, then use a naive implementation, which has a // race condition. -template +template inline T sync_val_compare_and_swap(T* ptr, T oldval, T newval) { #if defined(HAVE___SYNC_VAL_COMPARE_AND_SWAP) - return __sync_val_compare_and_swap(ptr, oldval, newval); + return __sync_val_compare_and_swap(ptr, oldval, newval); #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) - T ret; - __asm__ __volatile__("lock; cmpxchg %1, (%2);" - :"=a"(ret) - // GCC may produces %sil or %dil for - // constraint "r", but some of apple's gas - // dosn't know the 8 bit registers. - // We use "q" to avoid these registers. - :"q"(newval), "q"(ptr), "a"(oldval) - :"memory", "cc"); - return ret; + T ret; + __asm__ __volatile__("lock; cmpxchg %1, (%2);" + : "=a"(ret) + // GCC may produces %sil or %dil for + // constraint "r", but some of apple's gas + // dosn't know the 8 bit registers. + // We use "q" to avoid these registers. + : "q"(newval), "q"(ptr), "a"(oldval) + : "memory", "cc"); + return ret; #else - T ret = *ptr; - if (ret == oldval) { - *ptr = newval; - } - return ret; + T ret = *ptr; + if (ret == oldval) { + *ptr = newval; + } + return ret; #endif } // Dumps signal and stack frame information, and invokes the default // signal handler once our job is done. -void FailureSignalHandler(int signal_number, - siginfo_t *signal_info, - void *ucontext) -{ - // First check if we've already entered the function. We use an atomic - // compare and swap operation for platforms that support it. For other - // platforms, we use a naive method that could lead to a subtle race. +void FailureSignalHandler(int signal_number, siginfo_t* signal_info, void* ucontext) { + // First check if we've already entered the function. We use an atomic + // compare and swap operation for platforms that support it. For other + // platforms, we use a naive method that could lead to a subtle race. - // We assume pthread_self() is async signal safe, though it's not - // officially guaranteed. - pthread_t my_thread_id = pthread_self(); - // NOTE: We could simply use pthread_t rather than pthread_t* for this, - // if pthread_self() is guaranteed to return non-zero value for thread - // ids, but there is no such guarantee. We need to distinguish if the - // old value (value returned from __sync_val_compare_and_swap) is - // different from the original value (in this case NULL). - pthread_t* old_thread_id_pointer = - sync_val_compare_and_swap( - &g_entered_thread_id_pointer, - static_cast(NULL), - &my_thread_id); - if (old_thread_id_pointer != NULL) { - // We've already entered the signal handler. What should we do? - if (pthread_equal(my_thread_id, *g_entered_thread_id_pointer)) { - // It looks the current thread is reentering the signal handler. - // Something must be going wrong (maybe we are reentering by another - // type of signal?). Kill ourself by the default signal handler. - InvokeDefaultSignalHandler(signal_number); + // We assume pthread_self() is async signal safe, though it's not + // officially guaranteed. + pthread_t my_thread_id = pthread_self(); + // NOTE: We could simply use pthread_t rather than pthread_t* for this, + // if pthread_self() is guaranteed to return non-zero value for thread + // ids, but there is no such guarantee. We need to distinguish if the + // old value (value returned from __sync_val_compare_and_swap) is + // different from the original value (in this case NULL). + pthread_t* old_thread_id_pointer = sync_val_compare_and_swap( + &g_entered_thread_id_pointer, static_cast(NULL), &my_thread_id); + if (old_thread_id_pointer != NULL) { + // We've already entered the signal handler. What should we do? + if (pthread_equal(my_thread_id, *g_entered_thread_id_pointer)) { + // It looks the current thread is reentering the signal handler. + // Something must be going wrong (maybe we are reentering by another + // type of signal?). Kill ourself by the default signal handler. + InvokeDefaultSignalHandler(signal_number); + } + // Another thread is dumping stuff. Let's wait until that thread + // finishes the job and kills the process. + while (true) { + sleep(1); + } } - // Another thread is dumping stuff. Let's wait until that thread - // finishes the job and kills the process. - while (true) { - sleep(1); - } - } - // This is the first time we enter the signal handler. We are going to - // do some interesting stuff from here. - // TODO(satorux): We might want to set timeout here using alarm(), but - // mixing alarm() and sleep() can be a bad idea. + // This is the first time we enter the signal handler. We are going to + // do some interesting stuff from here. + // TODO(satorux): We might want to set timeout here using alarm(), but + // mixing alarm() and sleep() can be a bad idea. - // First dump time info. - DumpTimeInfo(); - DumpSignalInfo(signal_number, signal_info); + // First dump time info. + DumpTimeInfo(); + DumpSignalInfo(signal_number, signal_info); - // *** TRANSITION *** - // - // BEFORE this point, all code must be async-termination-safe! - // (See WARNING above.) - // - // AFTER this point, we do unsafe things, like using LOG()! - // The process could be terminated or hung at any time. We try to - // do more useful things first and riskier things later. + // *** TRANSITION *** + // + // BEFORE this point, all code must be async-termination-safe! + // (See WARNING above.) + // + // AFTER this point, we do unsafe things, like using LOG()! + // The process could be terminated or hung at any time. We try to + // do more useful things first and riskier things later. - // Use boost stacktrace to print more detail info - std::cout << boost::stacktrace::stacktrace() << std::endl; + // Use boost stacktrace to print more detail info + std::cout << boost::stacktrace::stacktrace() << std::endl; - // Flush the logs before we do anything in case 'anything' - // causes problems. - google::FlushLogFilesUnsafe(0); + // Flush the logs before we do anything in case 'anything' + // causes problems. + google::FlushLogFilesUnsafe(0); - // Kill ourself by the default signal handler. - InvokeDefaultSignalHandler(signal_number); + // Kill ourself by the default signal handler. + InvokeDefaultSignalHandler(signal_number); } -} // namespace +} // namespace void InstallFailureSignalHandler() { - // Build the sigaction struct. - struct sigaction sig_action; - memset(&sig_action, 0, sizeof(sig_action)); - sigemptyset(&sig_action.sa_mask); - sig_action.sa_flags |= SA_SIGINFO; - sig_action.sa_sigaction = &FailureSignalHandler; + // Build the sigaction struct. + struct sigaction sig_action; + memset(&sig_action, 0, sizeof(sig_action)); + sigemptyset(&sig_action.sa_mask); + sig_action.sa_flags |= SA_SIGINFO; + sig_action.sa_sigaction = &FailureSignalHandler; - for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) { - CHECK_ERR(sigaction(kFailureSignals[i].number, &sig_action, NULL)); - } - kFailureSignalHandlerInstalled = true; + for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) { + CHECK_ERR(sigaction(kFailureSignals[i].number, &sig_action, NULL)); + } + kFailureSignalHandlerInstalled = true; } -} // namespace doris +} // namespace doris::signal diff --git a/be/src/common/status.cpp b/be/src/common/status.cpp index bb283e11ea..ec120dc396 100644 --- a/be/src/common/status.cpp +++ b/be/src/common/status.cpp @@ -15,20 +15,22 @@ struct ErrorCodeState { int16_t error_code = 0; bool stacktrace = true; std::string description; - size_t count = 0; // Used for count the number of error happens - std::mutex mutex; // lock guard for count state + size_t count = 0; // Used for count the number of error happens + std::mutex mutex; // lock guard for count state }; ErrorCodeState error_states[MAX_ERROR_NUM]; class Initializer { public: Initializer() { - #define M(NAME, ERRORCODE, DESC, STACKTRACEENABLED) error_states[abs(ERRORCODE)].stacktrace = STACKTRACEENABLED; +#define M(NAME, ERRORCODE, DESC, STACKTRACEENABLED) \ + error_states[abs(ERRORCODE)].stacktrace = STACKTRACEENABLED; APPLY_FOR_ERROR_CODES(M) - #undef M - // Currently, most of description is empty, so that we use NAME as description - #define M(NAME, ERRORCODE, DESC, STACKTRACEENABLED) error_states[abs(ERRORCODE)].description = #NAME; +#undef M +// Currently, most of description is empty, so that we use NAME as description +#define M(NAME, ERRORCODE, DESC, STACKTRACEENABLED) \ + error_states[abs(ERRORCODE)].description = #NAME; APPLY_FOR_ERROR_CODES(M) - #undef M +#undef M } }; Initializer init; // Used to init the error_states array @@ -47,7 +49,7 @@ Status::Status(const TStatus& s) { } } -// TODO yiguolei, maybe should init PStatus's precise code because OLAPInternal Error may +// TODO yiguolei, maybe should init PStatus's precise code because OLAPInternal Error may // tranfer precise code during BRPC Status::Status(const PStatus& s) { TStatusCode::type code = (TStatusCode::type)s.status_code(); @@ -66,15 +68,15 @@ Status::Status(const PStatus& s) { // Implement it here to remove the boost header file from status.h to reduce precompile time Status Status::ConstructErrorStatus(int16_t precise_code, const Slice& msg) { - // This will print all error status's stack, it maybe too many, but it is just used for debug - #ifdef PRINT_ALL_ERR_STATUS_STACKTRACE +// This will print all error status's stack, it maybe too many, but it is just used for debug +#ifdef PRINT_ALL_ERR_STATUS_STACKTRACE LOG(WARNING) << "Error occurred, error code = " << precise_code << ", with message: " << msg - << "\n caused by:" << boost::stacktrace::stacktrace(); - #endif + << "\n caused by:" << boost::stacktrace::stacktrace(); +#endif if (error_states[abs(precise_code)].stacktrace) { // Add stacktrace as part of message, could use LOG(WARN) << "" << status will print both // the error message and the stacktrace - return Status(TStatusCode::INTERNAL_ERROR, msg, precise_code, + return Status(TStatusCode::INTERNAL_ERROR, msg, precise_code, boost::stacktrace::to_string(boost::stacktrace::stacktrace())); } else { return Status(TStatusCode::INTERNAL_ERROR, msg, precise_code, Slice()); diff --git a/be/src/common/status.h b/be/src/common/status.h index 1ebbd913e3..60ebfc3c4b 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -21,217 +21,217 @@ namespace doris { // ErrorName, ErrorCode, String Description, Should print stacktrace -#define APPLY_FOR_ERROR_CODES(M) \ - M(OLAP_SUCCESS, 0, "", false) \ - M(OLAP_ERR_OTHER_ERROR, -1, "", true) \ - M(OLAP_REQUEST_FAILED, -2, "", true) \ - M(OLAP_ERR_OS_ERROR, -100, "", true) \ - M(OLAP_ERR_DIR_NOT_EXIST, -101, "", true) \ - M(OLAP_ERR_FILE_NOT_EXIST, -102, "", true) \ - M(OLAP_ERR_CREATE_FILE_ERROR, -103, "", true) \ - M(OLAP_ERR_MALLOC_ERROR, -104, "", true) \ - M(OLAP_ERR_STL_ERROR, -105, "", true) \ - M(OLAP_ERR_IO_ERROR, -106, "", true) \ - M(OLAP_ERR_MUTEX_ERROR, -107, "", true) \ - M(OLAP_ERR_PTHREAD_ERROR, -108, "", true) \ - M(OLAP_ERR_NETWORK_ERROR, -109, "", true) \ - M(OLAP_ERR_UB_FUNC_ERROR, -110, "", true) \ - M(OLAP_ERR_COMPRESS_ERROR, -111, "", true) \ - M(OLAP_ERR_DECOMPRESS_ERROR, -112, "", true) \ - M(OLAP_ERR_UNKNOWN_COMPRESSION_TYPE, -113, "", true) \ - M(OLAP_ERR_MMAP_ERROR, -114, "", true) \ - M(OLAP_ERR_RWLOCK_ERROR, -115, "", true) \ - M(OLAP_ERR_READ_UNENOUGH, -116, "", true) \ - M(OLAP_ERR_CANNOT_CREATE_DIR, -117, "", true) \ - M(OLAP_ERR_UB_NETWORK_ERROR, -118, "", true) \ - M(OLAP_ERR_FILE_FORMAT_ERROR, -119, "", true) \ - M(OLAP_ERR_EVAL_CONJUNCTS_ERROR, -120, "", true) \ - M(OLAP_ERR_COPY_FILE_ERROR, -121, "", true) \ - M(OLAP_ERR_FILE_ALREADY_EXIST, -122, "", true) \ - M(OLAP_ERR_NOT_INITED, -200, "", true) \ - M(OLAP_ERR_FUNC_NOT_IMPLEMENTED, -201, "", true) \ - M(OLAP_ERR_CALL_SEQUENCE_ERROR, -202, "", true) \ - M(OLAP_ERR_INPUT_PARAMETER_ERROR, -203, "", true) \ - M(OLAP_ERR_BUFFER_OVERFLOW, -204, "", true) \ - M(OLAP_ERR_CONFIG_ERROR, -205, "", true) \ - M(OLAP_ERR_INIT_FAILED, -206, "", true) \ - M(OLAP_ERR_INVALID_SCHEMA, -207, "", true) \ - M(OLAP_ERR_CHECKSUM_ERROR, -208, "", true) \ - M(OLAP_ERR_SIGNATURE_ERROR, -209, "", true) \ - M(OLAP_ERR_CATCH_EXCEPTION, -210, "", true) \ - M(OLAP_ERR_PARSE_PROTOBUF_ERROR, -211, "", true) \ - M(OLAP_ERR_SERIALIZE_PROTOBUF_ERROR, -212, "", true) \ - M(OLAP_ERR_WRITE_PROTOBUF_ERROR, -213, "", true) \ - M(OLAP_ERR_VERSION_NOT_EXIST, -214, "", true) \ - M(OLAP_ERR_TABLE_NOT_FOUND, -215, "", true) \ - M(OLAP_ERR_TRY_LOCK_FAILED, -216, "", true) \ - M(OLAP_ERR_OUT_OF_BOUND, -218, "", true) \ - M(OLAP_ERR_UNDERFLOW, -219, "", true) \ - M(OLAP_ERR_FILE_DATA_ERROR, -220, "", true) \ - M(OLAP_ERR_TEST_FILE_ERROR, -221, "", true) \ - M(OLAP_ERR_INVALID_ROOT_PATH, -222, "", true) \ - M(OLAP_ERR_NO_AVAILABLE_ROOT_PATH, -223, "", true) \ - M(OLAP_ERR_CHECK_LINES_ERROR, -224, "", true) \ - M(OLAP_ERR_INVALID_CLUSTER_INFO, -225, "", true) \ - M(OLAP_ERR_TRANSACTION_NOT_EXIST, -226, "", true) \ - M(OLAP_ERR_DISK_FAILURE, -227, "", true) \ - M(OLAP_ERR_TRANSACTION_ALREADY_COMMITTED, -228, "", true) \ - M(OLAP_ERR_TRANSACTION_ALREADY_VISIBLE, -229, "", true) \ - M(OLAP_ERR_VERSION_ALREADY_MERGED, -230, "", true) \ - M(OLAP_ERR_LZO_DISABLED, -231, "", true) \ - M(OLAP_ERR_DISK_REACH_CAPACITY_LIMIT, -232, "", true) \ - M(OLAP_ERR_TOO_MANY_TRANSACTIONS, -233, "", true) \ - M(OLAP_ERR_INVALID_SNAPSHOT_VERSION, -234, "", true) \ - M(OLAP_ERR_TOO_MANY_VERSION, -235, "", true) \ - M(OLAP_ERR_NOT_INITIALIZED, -236, "", true) \ - M(OLAP_ERR_ALREADY_CANCELLED, -237, "", true) \ - M(OLAP_ERR_TOO_MANY_SEGMENTS, -238, "", true) \ - M(OLAP_ERR_CE_CMD_PARAMS_ERROR, -300, "", true) \ - M(OLAP_ERR_CE_BUFFER_TOO_SMALL, -301, "", true) \ - M(OLAP_ERR_CE_CMD_NOT_VALID, -302, "", true) \ - M(OLAP_ERR_CE_LOAD_TABLE_ERROR, -303, "", true) \ - M(OLAP_ERR_CE_NOT_FINISHED, -304, "", true) \ - M(OLAP_ERR_CE_TABLET_ID_EXIST, -305, "", true) \ - M(OLAP_ERR_CE_TRY_CE_LOCK_ERROR, -306, "", true) \ - M(OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR, -400, "", true) \ - M(OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR, -401, "", true) \ - M(OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR, -402, "", true) \ - M(OLAP_ERR_TABLE_INDEX_FIND_ERROR, -403, "", true) \ - M(OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR, -404, "", true) \ - M(OLAP_ERR_TABLE_CREATE_META_ERROR, -405, "", true) \ - M(OLAP_ERR_TABLE_ALREADY_DELETED_ERROR, -406, "", true) \ - M(OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE, -500, "", true) \ - M(OLAP_ERR_ENGINE_DROP_NOEXISTS_TABLE, -501, "", true) \ - M(OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR, -502, "", true) \ - M(OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR, -503, "", true) \ - M(OLAP_ERR_DELETE_VERSION_ERROR, -504, "", true) \ - M(OLAP_ERR_GC_SCAN_PATH_ERROR, -505, "", true) \ - M(OLAP_ERR_ENGINE_INSERT_OLD_TABLET, -506, "", true) \ - M(OLAP_ERR_FETCH_OTHER_ERROR, -600, "", true) \ - M(OLAP_ERR_FETCH_TABLE_NOT_EXIST, -601, "", true) \ - M(OLAP_ERR_FETCH_VERSION_ERROR, -602, "", true) \ - M(OLAP_ERR_FETCH_SCHEMA_ERROR, -603, "", true) \ - M(OLAP_ERR_FETCH_COMPRESSION_ERROR, -604, "", true) \ - M(OLAP_ERR_FETCH_CONTEXT_NOT_EXIST, -605, "", true) \ - M(OLAP_ERR_FETCH_GET_READER_PARAMS_ERR, -606, "", true) \ - M(OLAP_ERR_FETCH_SAVE_SESSION_ERR, -607, "", true) \ - M(OLAP_ERR_FETCH_MEMORY_EXCEEDED, -608, "", true) \ - M(OLAP_ERR_READER_IS_UNINITIALIZED, -700, "", true) \ - M(OLAP_ERR_READER_GET_ITERATOR_ERROR, -701, "", true) \ - M(OLAP_ERR_CAPTURE_ROWSET_READER_ERROR, -702, "", true) \ - M(OLAP_ERR_READER_READING_ERROR, -703, "", true) \ - M(OLAP_ERR_READER_INITIALIZE_ERROR, -704, "", true) \ - M(OLAP_ERR_BE_VERSION_NOT_MATCH, -800, "", true) \ - M(OLAP_ERR_BE_REPLACE_VERSIONS_ERROR, -801, "", true) \ - M(OLAP_ERR_BE_MERGE_ERROR, -802, "", true) \ - M(OLAP_ERR_CAPTURE_ROWSET_ERROR, -804, "", true) \ - M(OLAP_ERR_BE_SAVE_HEADER_ERROR, -805, "", true) \ - M(OLAP_ERR_BE_INIT_OLAP_DATA, -806, "", true) \ - M(OLAP_ERR_BE_TRY_OBTAIN_VERSION_LOCKS, -807, "", true) \ - M(OLAP_ERR_BE_NO_SUITABLE_VERSION, -808, "", false) \ - M(OLAP_ERR_BE_TRY_BE_LOCK_ERROR, -809, "", true) \ - M(OLAP_ERR_BE_INVALID_NEED_MERGED_VERSIONS, -810, "", true) \ - M(OLAP_ERR_BE_ERROR_DELETE_ACTION, -811, "", true) \ - M(OLAP_ERR_BE_SEGMENTS_OVERLAPPING, -812, "", true) \ - M(OLAP_ERR_BE_CLONE_OCCURRED, -813, "", true) \ - M(OLAP_ERR_PUSH_INIT_ERROR, -900, "", true) \ - M(OLAP_ERR_PUSH_DELTA_FILE_EOF, -901, "", false) \ - M(OLAP_ERR_PUSH_VERSION_INCORRECT, -902, "", true) \ - M(OLAP_ERR_PUSH_SCHEMA_MISMATCH, -903, "", true) \ - M(OLAP_ERR_PUSH_CHECKSUM_ERROR, -904, "", true) \ - M(OLAP_ERR_PUSH_ACQUIRE_DATASOURCE_ERROR, -905, "", true) \ - M(OLAP_ERR_PUSH_CREAT_CUMULATIVE_ERROR, -906, "", true) \ - M(OLAP_ERR_PUSH_BUILD_DELTA_ERROR, -907, "", true) \ - M(OLAP_ERR_PUSH_VERSION_ALREADY_EXIST, -908, "", true) \ - M(OLAP_ERR_PUSH_TABLE_NOT_EXIST, -909, "", true) \ - M(OLAP_ERR_PUSH_INPUT_DATA_ERROR, -910, "", true) \ - M(OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST, -911, "", true) \ - M(OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED, -912, "", true) \ - M(OLAP_ERR_PUSH_COMMIT_ROWSET, -913, "", true) \ - M(OLAP_ERR_PUSH_ROWSET_NOT_FOUND, -914, "", true) \ - M(OLAP_ERR_INDEX_LOAD_ERROR, -1000, "", true) \ - M(OLAP_ERR_INDEX_EOF, -1001, "", false) \ - M(OLAP_ERR_INDEX_CHECKSUM_ERROR, -1002, "", true) \ - M(OLAP_ERR_INDEX_DELTA_PRUNING, -1003, "", true) \ - M(OLAP_ERR_DATA_ROW_BLOCK_ERROR, -1100, "", true) \ - M(OLAP_ERR_DATA_FILE_TYPE_ERROR, -1101, "", true) \ - M(OLAP_ERR_DATA_EOF, -1102, "", false) \ - M(OLAP_ERR_WRITER_INDEX_WRITE_ERROR, -1200, "", true) \ - M(OLAP_ERR_WRITER_DATA_WRITE_ERROR, -1201, "", true) \ - M(OLAP_ERR_WRITER_ROW_BLOCK_ERROR, -1202, "", true) \ - M(OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED, -1203, "", true) \ - M(OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR, -1300, "", true) \ - M(OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION, -1301, "", true) \ - M(OLAP_ERR_ROWBLOCK_READ_INFO_ERROR, -1302, "", true) \ - M(OLAP_ERR_HEADER_ADD_VERSION, -1400, "", true) \ - M(OLAP_ERR_HEADER_DELETE_VERSION, -1401, "", true) \ - M(OLAP_ERR_HEADER_ADD_PENDING_DELTA, -1402, "", true) \ - M(OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION, -1403, "", true) \ - M(OLAP_ERR_HEADER_INVALID_FLAG, -1404, "", true) \ - M(OLAP_ERR_HEADER_PUT, -1405, "", true) \ - M(OLAP_ERR_HEADER_DELETE, -1406, "", true) \ - M(OLAP_ERR_HEADER_GET, -1407, "", true) \ - M(OLAP_ERR_HEADER_LOAD_INVALID_KEY, -1408, "", true) \ - M(OLAP_ERR_HEADER_FLAG_PUT, -1409, "", true) \ - M(OLAP_ERR_HEADER_LOAD_JSON_HEADER, -1410, "", true) \ - M(OLAP_ERR_HEADER_INIT_FAILED, -1411, "", true) \ - M(OLAP_ERR_HEADER_PB_PARSE_FAILED, -1412, "", true) \ - M(OLAP_ERR_HEADER_HAS_PENDING_DATA, -1413, "", true) \ - M(OLAP_ERR_SCHEMA_SCHEMA_INVALID, -1500, "", true) \ - M(OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID, -1501, "", true) \ - M(OLAP_ERR_ALTER_MULTI_TABLE_ERR, -1600, "", true) \ - M(OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS, -1601, "", true) \ - M(OLAP_ERR_ALTER_STATUS_ERR, -1602, "", true) \ - M(OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED, -1603, "", true) \ - M(OLAP_ERR_SCHEMA_CHANGE_INFO_INVALID, -1604, "", true) \ - M(OLAP_ERR_QUERY_SPLIT_KEY_ERR, -1605, "", true) \ - M(OLAP_ERR_DATA_QUALITY_ERR, -1606, "", true) \ - M(OLAP_ERR_COLUMN_DATA_LOAD_BLOCK, -1700, "", true) \ - M(OLAP_ERR_COLUMN_DATA_RECORD_INDEX, -1701, "", true) \ - M(OLAP_ERR_COLUMN_DATA_MAKE_FILE_HEADER, -1702, "", true) \ - M(OLAP_ERR_COLUMN_DATA_READ_VAR_INT, -1703, "", true) \ - M(OLAP_ERR_COLUMN_DATA_PATCH_LIST_NUM, -1704, "", true) \ - M(OLAP_ERR_COLUMN_STREAM_EOF, -1705, "", false) \ - M(OLAP_ERR_COLUMN_READ_STREAM, -1706, "", true) \ - M(OLAP_ERR_COLUMN_STREAM_NOT_EXIST, -1716, "", true) \ - M(OLAP_ERR_COLUMN_VALUE_NULL, -1717, "", true) \ - M(OLAP_ERR_COLUMN_SEEK_ERROR, -1719, "", true) \ - M(OLAP_ERR_DELETE_INVALID_CONDITION, -1900, "", true) \ - M(OLAP_ERR_DELETE_UPDATE_HEADER_FAILED, -1901, "", true) \ - M(OLAP_ERR_DELETE_SAVE_HEADER_FAILED, -1902, "", true) \ - M(OLAP_ERR_DELETE_INVALID_PARAMETERS, -1903, "", true) \ - M(OLAP_ERR_DELETE_INVALID_VERSION, -1904, "", true) \ - M(OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSION, -2000, "", true) \ - M(OLAP_ERR_CUMULATIVE_REPEAT_INIT, -2001, "", true) \ - M(OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS, -2002, "", true) \ - M(OLAP_ERR_CUMULATIVE_FAILED_ACQUIRE_DATA_SOURCE, -2003, "", true) \ +#define APPLY_FOR_ERROR_CODES(M) \ + M(OLAP_SUCCESS, 0, "", false) \ + M(OLAP_ERR_OTHER_ERROR, -1, "", true) \ + M(OLAP_REQUEST_FAILED, -2, "", true) \ + M(OLAP_ERR_OS_ERROR, -100, "", true) \ + M(OLAP_ERR_DIR_NOT_EXIST, -101, "", true) \ + M(OLAP_ERR_FILE_NOT_EXIST, -102, "", true) \ + M(OLAP_ERR_CREATE_FILE_ERROR, -103, "", true) \ + M(OLAP_ERR_MALLOC_ERROR, -104, "", true) \ + M(OLAP_ERR_STL_ERROR, -105, "", true) \ + M(OLAP_ERR_IO_ERROR, -106, "", true) \ + M(OLAP_ERR_MUTEX_ERROR, -107, "", true) \ + M(OLAP_ERR_PTHREAD_ERROR, -108, "", true) \ + M(OLAP_ERR_NETWORK_ERROR, -109, "", true) \ + M(OLAP_ERR_UB_FUNC_ERROR, -110, "", true) \ + M(OLAP_ERR_COMPRESS_ERROR, -111, "", true) \ + M(OLAP_ERR_DECOMPRESS_ERROR, -112, "", true) \ + M(OLAP_ERR_UNKNOWN_COMPRESSION_TYPE, -113, "", true) \ + M(OLAP_ERR_MMAP_ERROR, -114, "", true) \ + M(OLAP_ERR_RWLOCK_ERROR, -115, "", true) \ + M(OLAP_ERR_READ_UNENOUGH, -116, "", true) \ + M(OLAP_ERR_CANNOT_CREATE_DIR, -117, "", true) \ + M(OLAP_ERR_UB_NETWORK_ERROR, -118, "", true) \ + M(OLAP_ERR_FILE_FORMAT_ERROR, -119, "", true) \ + M(OLAP_ERR_EVAL_CONJUNCTS_ERROR, -120, "", true) \ + M(OLAP_ERR_COPY_FILE_ERROR, -121, "", true) \ + M(OLAP_ERR_FILE_ALREADY_EXIST, -122, "", true) \ + M(OLAP_ERR_NOT_INITED, -200, "", true) \ + M(OLAP_ERR_FUNC_NOT_IMPLEMENTED, -201, "", true) \ + M(OLAP_ERR_CALL_SEQUENCE_ERROR, -202, "", true) \ + M(OLAP_ERR_INPUT_PARAMETER_ERROR, -203, "", true) \ + M(OLAP_ERR_BUFFER_OVERFLOW, -204, "", true) \ + M(OLAP_ERR_CONFIG_ERROR, -205, "", true) \ + M(OLAP_ERR_INIT_FAILED, -206, "", true) \ + M(OLAP_ERR_INVALID_SCHEMA, -207, "", true) \ + M(OLAP_ERR_CHECKSUM_ERROR, -208, "", true) \ + M(OLAP_ERR_SIGNATURE_ERROR, -209, "", true) \ + M(OLAP_ERR_CATCH_EXCEPTION, -210, "", true) \ + M(OLAP_ERR_PARSE_PROTOBUF_ERROR, -211, "", true) \ + M(OLAP_ERR_SERIALIZE_PROTOBUF_ERROR, -212, "", true) \ + M(OLAP_ERR_WRITE_PROTOBUF_ERROR, -213, "", true) \ + M(OLAP_ERR_VERSION_NOT_EXIST, -214, "", true) \ + M(OLAP_ERR_TABLE_NOT_FOUND, -215, "", true) \ + M(OLAP_ERR_TRY_LOCK_FAILED, -216, "", true) \ + M(OLAP_ERR_OUT_OF_BOUND, -218, "", true) \ + M(OLAP_ERR_UNDERFLOW, -219, "", true) \ + M(OLAP_ERR_FILE_DATA_ERROR, -220, "", true) \ + M(OLAP_ERR_TEST_FILE_ERROR, -221, "", true) \ + M(OLAP_ERR_INVALID_ROOT_PATH, -222, "", true) \ + M(OLAP_ERR_NO_AVAILABLE_ROOT_PATH, -223, "", true) \ + M(OLAP_ERR_CHECK_LINES_ERROR, -224, "", true) \ + M(OLAP_ERR_INVALID_CLUSTER_INFO, -225, "", true) \ + M(OLAP_ERR_TRANSACTION_NOT_EXIST, -226, "", true) \ + M(OLAP_ERR_DISK_FAILURE, -227, "", true) \ + M(OLAP_ERR_TRANSACTION_ALREADY_COMMITTED, -228, "", true) \ + M(OLAP_ERR_TRANSACTION_ALREADY_VISIBLE, -229, "", true) \ + M(OLAP_ERR_VERSION_ALREADY_MERGED, -230, "", true) \ + M(OLAP_ERR_LZO_DISABLED, -231, "", true) \ + M(OLAP_ERR_DISK_REACH_CAPACITY_LIMIT, -232, "", true) \ + M(OLAP_ERR_TOO_MANY_TRANSACTIONS, -233, "", true) \ + M(OLAP_ERR_INVALID_SNAPSHOT_VERSION, -234, "", true) \ + M(OLAP_ERR_TOO_MANY_VERSION, -235, "", true) \ + M(OLAP_ERR_NOT_INITIALIZED, -236, "", true) \ + M(OLAP_ERR_ALREADY_CANCELLED, -237, "", true) \ + M(OLAP_ERR_TOO_MANY_SEGMENTS, -238, "", true) \ + M(OLAP_ERR_CE_CMD_PARAMS_ERROR, -300, "", true) \ + M(OLAP_ERR_CE_BUFFER_TOO_SMALL, -301, "", true) \ + M(OLAP_ERR_CE_CMD_NOT_VALID, -302, "", true) \ + M(OLAP_ERR_CE_LOAD_TABLE_ERROR, -303, "", true) \ + M(OLAP_ERR_CE_NOT_FINISHED, -304, "", true) \ + M(OLAP_ERR_CE_TABLET_ID_EXIST, -305, "", true) \ + M(OLAP_ERR_CE_TRY_CE_LOCK_ERROR, -306, "", true) \ + M(OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR, -400, "", true) \ + M(OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR, -401, "", true) \ + M(OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR, -402, "", true) \ + M(OLAP_ERR_TABLE_INDEX_FIND_ERROR, -403, "", true) \ + M(OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR, -404, "", true) \ + M(OLAP_ERR_TABLE_CREATE_META_ERROR, -405, "", true) \ + M(OLAP_ERR_TABLE_ALREADY_DELETED_ERROR, -406, "", true) \ + M(OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE, -500, "", true) \ + M(OLAP_ERR_ENGINE_DROP_NOEXISTS_TABLE, -501, "", true) \ + M(OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR, -502, "", true) \ + M(OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR, -503, "", true) \ + M(OLAP_ERR_DELETE_VERSION_ERROR, -504, "", true) \ + M(OLAP_ERR_GC_SCAN_PATH_ERROR, -505, "", true) \ + M(OLAP_ERR_ENGINE_INSERT_OLD_TABLET, -506, "", true) \ + M(OLAP_ERR_FETCH_OTHER_ERROR, -600, "", true) \ + M(OLAP_ERR_FETCH_TABLE_NOT_EXIST, -601, "", true) \ + M(OLAP_ERR_FETCH_VERSION_ERROR, -602, "", true) \ + M(OLAP_ERR_FETCH_SCHEMA_ERROR, -603, "", true) \ + M(OLAP_ERR_FETCH_COMPRESSION_ERROR, -604, "", true) \ + M(OLAP_ERR_FETCH_CONTEXT_NOT_EXIST, -605, "", true) \ + M(OLAP_ERR_FETCH_GET_READER_PARAMS_ERR, -606, "", true) \ + M(OLAP_ERR_FETCH_SAVE_SESSION_ERR, -607, "", true) \ + M(OLAP_ERR_FETCH_MEMORY_EXCEEDED, -608, "", true) \ + M(OLAP_ERR_READER_IS_UNINITIALIZED, -700, "", true) \ + M(OLAP_ERR_READER_GET_ITERATOR_ERROR, -701, "", true) \ + M(OLAP_ERR_CAPTURE_ROWSET_READER_ERROR, -702, "", true) \ + M(OLAP_ERR_READER_READING_ERROR, -703, "", true) \ + M(OLAP_ERR_READER_INITIALIZE_ERROR, -704, "", true) \ + M(OLAP_ERR_BE_VERSION_NOT_MATCH, -800, "", true) \ + M(OLAP_ERR_BE_REPLACE_VERSIONS_ERROR, -801, "", true) \ + M(OLAP_ERR_BE_MERGE_ERROR, -802, "", true) \ + M(OLAP_ERR_CAPTURE_ROWSET_ERROR, -804, "", true) \ + M(OLAP_ERR_BE_SAVE_HEADER_ERROR, -805, "", true) \ + M(OLAP_ERR_BE_INIT_OLAP_DATA, -806, "", true) \ + M(OLAP_ERR_BE_TRY_OBTAIN_VERSION_LOCKS, -807, "", true) \ + M(OLAP_ERR_BE_NO_SUITABLE_VERSION, -808, "", false) \ + M(OLAP_ERR_BE_TRY_BE_LOCK_ERROR, -809, "", true) \ + M(OLAP_ERR_BE_INVALID_NEED_MERGED_VERSIONS, -810, "", true) \ + M(OLAP_ERR_BE_ERROR_DELETE_ACTION, -811, "", true) \ + M(OLAP_ERR_BE_SEGMENTS_OVERLAPPING, -812, "", true) \ + M(OLAP_ERR_BE_CLONE_OCCURRED, -813, "", true) \ + M(OLAP_ERR_PUSH_INIT_ERROR, -900, "", true) \ + M(OLAP_ERR_PUSH_DELTA_FILE_EOF, -901, "", false) \ + M(OLAP_ERR_PUSH_VERSION_INCORRECT, -902, "", true) \ + M(OLAP_ERR_PUSH_SCHEMA_MISMATCH, -903, "", true) \ + M(OLAP_ERR_PUSH_CHECKSUM_ERROR, -904, "", true) \ + M(OLAP_ERR_PUSH_ACQUIRE_DATASOURCE_ERROR, -905, "", true) \ + M(OLAP_ERR_PUSH_CREAT_CUMULATIVE_ERROR, -906, "", true) \ + M(OLAP_ERR_PUSH_BUILD_DELTA_ERROR, -907, "", true) \ + M(OLAP_ERR_PUSH_VERSION_ALREADY_EXIST, -908, "", true) \ + M(OLAP_ERR_PUSH_TABLE_NOT_EXIST, -909, "", true) \ + M(OLAP_ERR_PUSH_INPUT_DATA_ERROR, -910, "", true) \ + M(OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST, -911, "", true) \ + M(OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED, -912, "", true) \ + M(OLAP_ERR_PUSH_COMMIT_ROWSET, -913, "", true) \ + M(OLAP_ERR_PUSH_ROWSET_NOT_FOUND, -914, "", true) \ + M(OLAP_ERR_INDEX_LOAD_ERROR, -1000, "", true) \ + M(OLAP_ERR_INDEX_EOF, -1001, "", false) \ + M(OLAP_ERR_INDEX_CHECKSUM_ERROR, -1002, "", true) \ + M(OLAP_ERR_INDEX_DELTA_PRUNING, -1003, "", true) \ + M(OLAP_ERR_DATA_ROW_BLOCK_ERROR, -1100, "", true) \ + M(OLAP_ERR_DATA_FILE_TYPE_ERROR, -1101, "", true) \ + M(OLAP_ERR_DATA_EOF, -1102, "", false) \ + M(OLAP_ERR_WRITER_INDEX_WRITE_ERROR, -1200, "", true) \ + M(OLAP_ERR_WRITER_DATA_WRITE_ERROR, -1201, "", true) \ + M(OLAP_ERR_WRITER_ROW_BLOCK_ERROR, -1202, "", true) \ + M(OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED, -1203, "", true) \ + M(OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR, -1300, "", true) \ + M(OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION, -1301, "", true) \ + M(OLAP_ERR_ROWBLOCK_READ_INFO_ERROR, -1302, "", true) \ + M(OLAP_ERR_HEADER_ADD_VERSION, -1400, "", true) \ + M(OLAP_ERR_HEADER_DELETE_VERSION, -1401, "", true) \ + M(OLAP_ERR_HEADER_ADD_PENDING_DELTA, -1402, "", true) \ + M(OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION, -1403, "", true) \ + M(OLAP_ERR_HEADER_INVALID_FLAG, -1404, "", true) \ + M(OLAP_ERR_HEADER_PUT, -1405, "", true) \ + M(OLAP_ERR_HEADER_DELETE, -1406, "", true) \ + M(OLAP_ERR_HEADER_GET, -1407, "", true) \ + M(OLAP_ERR_HEADER_LOAD_INVALID_KEY, -1408, "", true) \ + M(OLAP_ERR_HEADER_FLAG_PUT, -1409, "", true) \ + M(OLAP_ERR_HEADER_LOAD_JSON_HEADER, -1410, "", true) \ + M(OLAP_ERR_HEADER_INIT_FAILED, -1411, "", true) \ + M(OLAP_ERR_HEADER_PB_PARSE_FAILED, -1412, "", true) \ + M(OLAP_ERR_HEADER_HAS_PENDING_DATA, -1413, "", true) \ + M(OLAP_ERR_SCHEMA_SCHEMA_INVALID, -1500, "", true) \ + M(OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID, -1501, "", true) \ + M(OLAP_ERR_ALTER_MULTI_TABLE_ERR, -1600, "", true) \ + M(OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS, -1601, "", true) \ + M(OLAP_ERR_ALTER_STATUS_ERR, -1602, "", true) \ + M(OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED, -1603, "", true) \ + M(OLAP_ERR_SCHEMA_CHANGE_INFO_INVALID, -1604, "", true) \ + M(OLAP_ERR_QUERY_SPLIT_KEY_ERR, -1605, "", true) \ + M(OLAP_ERR_DATA_QUALITY_ERR, -1606, "", true) \ + M(OLAP_ERR_COLUMN_DATA_LOAD_BLOCK, -1700, "", true) \ + M(OLAP_ERR_COLUMN_DATA_RECORD_INDEX, -1701, "", true) \ + M(OLAP_ERR_COLUMN_DATA_MAKE_FILE_HEADER, -1702, "", true) \ + M(OLAP_ERR_COLUMN_DATA_READ_VAR_INT, -1703, "", true) \ + M(OLAP_ERR_COLUMN_DATA_PATCH_LIST_NUM, -1704, "", true) \ + M(OLAP_ERR_COLUMN_STREAM_EOF, -1705, "", false) \ + M(OLAP_ERR_COLUMN_READ_STREAM, -1706, "", true) \ + M(OLAP_ERR_COLUMN_STREAM_NOT_EXIST, -1716, "", true) \ + M(OLAP_ERR_COLUMN_VALUE_NULL, -1717, "", true) \ + M(OLAP_ERR_COLUMN_SEEK_ERROR, -1719, "", true) \ + M(OLAP_ERR_DELETE_INVALID_CONDITION, -1900, "", true) \ + M(OLAP_ERR_DELETE_UPDATE_HEADER_FAILED, -1901, "", true) \ + M(OLAP_ERR_DELETE_SAVE_HEADER_FAILED, -1902, "", true) \ + M(OLAP_ERR_DELETE_INVALID_PARAMETERS, -1903, "", true) \ + M(OLAP_ERR_DELETE_INVALID_VERSION, -1904, "", true) \ + M(OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSION, -2000, "", true) \ + M(OLAP_ERR_CUMULATIVE_REPEAT_INIT, -2001, "", true) \ + M(OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS, -2002, "", true) \ + M(OLAP_ERR_CUMULATIVE_FAILED_ACQUIRE_DATA_SOURCE, -2003, "", true) \ M(OLAP_ERR_CUMULATIVE_INVALID_NEED_MERGED_VERSIONS, -2004, "", true) \ - M(OLAP_ERR_CUMULATIVE_ERROR_DELETE_ACTION, -2005, "", true) \ - M(OLAP_ERR_CUMULATIVE_MISS_VERSION, -2006, "", true) \ - M(OLAP_ERR_CUMULATIVE_CLONE_OCCURRED, -2007, "", true) \ - M(OLAP_ERR_META_INVALID_ARGUMENT, -3000, "", true) \ - M(OLAP_ERR_META_OPEN_DB, -3001, "", true) \ - M(OLAP_ERR_META_KEY_NOT_FOUND, -3002, "", true) \ - M(OLAP_ERR_META_GET, -3003, "", true) \ - M(OLAP_ERR_META_PUT, -3004, "", true) \ - M(OLAP_ERR_META_ITERATOR, -3005, "", true) \ - M(OLAP_ERR_META_DELETE, -3006, "", true) \ - M(OLAP_ERR_META_ALREADY_EXIST, -3007, "", true) \ - M(OLAP_ERR_ROWSET_WRITER_INIT, -3100, "", true) \ - M(OLAP_ERR_ROWSET_SAVE_FAILED, -3101, "", true) \ - M(OLAP_ERR_ROWSET_GENERATE_ID_FAILED, -3102, "", true) \ - M(OLAP_ERR_ROWSET_DELETE_FILE_FAILED, -3103, "", true) \ - M(OLAP_ERR_ROWSET_BUILDER_INIT, -3104, "", true) \ - M(OLAP_ERR_ROWSET_TYPE_NOT_FOUND, -3105, "", true) \ - M(OLAP_ERR_ROWSET_ALREADY_EXIST, -3106, "", true) \ - M(OLAP_ERR_ROWSET_CREATE_READER, -3107, "", true) \ - M(OLAP_ERR_ROWSET_INVALID, -3108, "", true) \ - M(OLAP_ERR_ROWSET_LOAD_FAILED, -3109, "", true) \ - M(OLAP_ERR_ROWSET_READER_INIT, -3110, "", true) \ - M(OLAP_ERR_ROWSET_READ_FAILED, -3111, "", true) \ - M(OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION, -3112, "", true) \ - M(OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE, -3113, "", true) \ - M(OLAP_ERR_ROWSET_ADD_MIGRATION_V2, -3114, "", true) \ + M(OLAP_ERR_CUMULATIVE_ERROR_DELETE_ACTION, -2005, "", true) \ + M(OLAP_ERR_CUMULATIVE_MISS_VERSION, -2006, "", true) \ + M(OLAP_ERR_CUMULATIVE_CLONE_OCCURRED, -2007, "", true) \ + M(OLAP_ERR_META_INVALID_ARGUMENT, -3000, "", true) \ + M(OLAP_ERR_META_OPEN_DB, -3001, "", true) \ + M(OLAP_ERR_META_KEY_NOT_FOUND, -3002, "", true) \ + M(OLAP_ERR_META_GET, -3003, "", true) \ + M(OLAP_ERR_META_PUT, -3004, "", true) \ + M(OLAP_ERR_META_ITERATOR, -3005, "", true) \ + M(OLAP_ERR_META_DELETE, -3006, "", true) \ + M(OLAP_ERR_META_ALREADY_EXIST, -3007, "", true) \ + M(OLAP_ERR_ROWSET_WRITER_INIT, -3100, "", true) \ + M(OLAP_ERR_ROWSET_SAVE_FAILED, -3101, "", true) \ + M(OLAP_ERR_ROWSET_GENERATE_ID_FAILED, -3102, "", true) \ + M(OLAP_ERR_ROWSET_DELETE_FILE_FAILED, -3103, "", true) \ + M(OLAP_ERR_ROWSET_BUILDER_INIT, -3104, "", true) \ + M(OLAP_ERR_ROWSET_TYPE_NOT_FOUND, -3105, "", true) \ + M(OLAP_ERR_ROWSET_ALREADY_EXIST, -3106, "", true) \ + M(OLAP_ERR_ROWSET_CREATE_READER, -3107, "", true) \ + M(OLAP_ERR_ROWSET_INVALID, -3108, "", true) \ + M(OLAP_ERR_ROWSET_LOAD_FAILED, -3109, "", true) \ + M(OLAP_ERR_ROWSET_READER_INIT, -3110, "", true) \ + M(OLAP_ERR_ROWSET_READ_FAILED, -3111, "", true) \ + M(OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION, -3112, "", true) \ + M(OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE, -3113, "", true) \ + M(OLAP_ERR_ROWSET_ADD_MIGRATION_V2, -3114, "", true) enum ErrorCode { #define M(NAME, ERRORCODE, DESC, STACKTRACEENABLED) NAME = ERRORCODE, @@ -240,24 +240,21 @@ enum ErrorCode { }; class Status { - enum { + enum { // If the error and log returned by the query are truncated, the status to string may be too long. STATE_CAPACITY = 2048, HEADER_LEN = 7, MESSAGE_LEN = STATE_CAPACITY - HEADER_LEN }; + public: Status() : _length(0) {} // copy c'tor makes copy of error detail so Status can be returned by value - Status(const Status& rhs) { - *this = rhs; - } + Status(const Status& rhs) { *this = rhs; } // move c'tor - Status(Status&& rhs) { - *this = rhs; - } + Status(Status&& rhs) { *this = rhs; } // same as copy c'tor Status& operator=(const Status& rhs) { @@ -375,14 +372,14 @@ public: return Status(TStatusCode::DATA_QUALITY_ERROR, msg, precise_code, msg2); } - template - static Status OLAPInternalError(int16_t precise_code, const std::string & fmt, Args&&... args) { + template + static Status OLAPInternalError(int16_t precise_code, const std::string& fmt, Args&&... args) { return ConstructErrorStatus(precise_code, fmt::format(fmt, std::forward(args)...)); } // A wrapper for ErrorCode // Precise code is for ErrorCode's enum value - // All Status Error is treated as Internal Error + // All Status Error is treated as Internal Error static Status OLAPInternalError(int16_t precise_code) { return ConstructErrorStatus(precise_code, Slice()); } @@ -453,9 +450,7 @@ public: return ok() ? TStatusCode::OK : static_cast(_code); } - int16_t precise_code() const { - return ok() ? 0 : _precise_code; - } + int16_t precise_code() const { return ok() ? 0 : _precise_code; } /// Clone this status and add the specified prefix to the message. /// @@ -483,13 +478,18 @@ public: // Used like if (res == Status::OK()) // if the state is ok, then both code and precise code is not initialized properly, so that should check ok state // ignore error messages during comparison - bool operator == (const Status& st) { return ok() ? st.ok() : code() == st.code() && precise_code() == st.precise_code(); } + bool operator==(const Status& st) { + return ok() ? st.ok() : code() == st.code() && precise_code() == st.precise_code(); + } // Used like if (res != Status::OK()) - bool operator != (const Status& st) { return ok() ? !st.ok() : code() != st.code() || precise_code() != st.precise_code(); } + bool operator!=(const Status& st) { + return ok() ? !st.ok() : code() != st.code() || precise_code() != st.precise_code(); + } private: - void assemble_state(TStatusCode::type code, const Slice& msg, int16_t precise_code, const Slice& msg2) { + void assemble_state(TStatusCode::type code, const Slice& msg, int16_t precise_code, + const Slice& msg2) { DCHECK(code != TStatusCode::OK); uint32_t len1 = msg.size; uint32_t len2 = msg2.size; @@ -513,13 +513,13 @@ private: _precise_code = precise_code; // copy msg - char* result = _state + HEADER_LEN; + char* result = _state + HEADER_LEN; uint32_t len = std::min(len1, MESSAGE_LEN); memcpy(result, msg.data, len); // copy msg2 if (len2 > 0 && len < MESSAGE_LEN - 2) { - result[len++] = ':'; + result[len++] = ':'; result[len++] = ' '; memcpy(&result[len], msg2.data, std::min(len2, MESSAGE_LEN - len)); } @@ -543,18 +543,17 @@ private: // Message length == HEADER(7 bytes) + message size // Sometimes error message is empty, so that we could not use length==0 to indicate // whether there is error happens - int64_t _length : 32; - int64_t _code : 8; + int64_t _length : 32; + int64_t _code : 8; int64_t _precise_code : 16; - int64_t _message : 8; // save message since here + int64_t _message : 8; // save message since here }; }; }; // Override the << operator, it is used during LOG(INFO) << "xxxx" << status; // Add inline here to dedup many includes -inline std::ostream & operator << (std::ostream & ostr, const Status & param) -{ +inline std::ostream& operator<<(std::ostream& ostr, const Status& param) { return ostr << param.to_string(); } diff --git a/be/src/common/utils.h b/be/src/common/utils.h index 91eb4427c9..8bfdf51810 100644 --- a/be/src/common/utils.h +++ b/be/src/common/utils.h @@ -22,7 +22,7 @@ namespace doris { #ifndef ARRAY_SIZE -#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) +#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) #endif struct AuthInfo { @@ -57,7 +57,7 @@ void set_request_auth(T* req, const AuthInfo& auth) { } } -// This is the threshold used to periodically release the memory occupied by the expression. +// This is the threshold used to periodically release the memory occupied by the expression. // RELEASE_CONTEXT_COUNTER should be power of 2 // GCC will optimize the modulo operation to &(release_context_counter - 1) // _conjunct_ctxs will free local alloc after this probe calculations @@ -65,7 +65,7 @@ static constexpr int RELEASE_CONTEXT_COUNTER = 1 << 7; static_assert((RELEASE_CONTEXT_COUNTER & (RELEASE_CONTEXT_COUNTER - 1)) == 0, "should be power of 2"); -template +template static inline To convert_to(From from) { union { From _from; diff --git a/be/src/env/env.cpp b/be/src/env/env.cpp index 896e58642f..2503f1cde4 100644 --- a/be/src/env/env.cpp +++ b/be/src/env/env.cpp @@ -24,7 +24,7 @@ namespace doris { std::shared_ptr Env::_posix_env(new PosixEnv()); // Default Posix Env -Env *Env::Default() { +Env* Env::Default() { return _posix_env.get(); } diff --git a/be/src/env/env.h b/be/src/env/env.h index aeafc1e226..e8d44917b0 100644 --- a/be/src/env/env.h +++ b/be/src/env/env.h @@ -202,11 +202,10 @@ struct FilePathDesc { // REMOTE_CACHE is the local cache path for remote path, if a data_dir is REMOTE_CACHE, // it means the tablet in it will be set as a remote path. static bool is_remote(TStorageMedium::type checked_storage_medium) { - return checked_storage_medium == TStorageMedium::S3 || checked_storage_medium == TStorageMedium::REMOTE_CACHE; - } - bool is_remote() const { - return is_remote(storage_medium); + return checked_storage_medium == TStorageMedium::S3 || + checked_storage_medium == TStorageMedium::REMOTE_CACHE; } + bool is_remote() const { return is_remote(storage_medium); } }; class FilePathDescStream { diff --git a/be/src/env/env_posix.h b/be/src/env/env_posix.h index 3b2681d478..876e2c16f1 100644 --- a/be/src/env/env_posix.h +++ b/be/src/env/env_posix.h @@ -87,7 +87,6 @@ public: Status link_file(const std::string& old_path, const std::string& new_path) override; Status get_space_info(const std::string& path, int64_t* capacity, int64_t* available) override; - }; } // namespace doris \ No newline at end of file diff --git a/be/src/exec/assert_num_rows_node.h b/be/src/exec/assert_num_rows_node.h index 8141147403..2e766f07c4 100644 --- a/be/src/exec/assert_num_rows_node.h +++ b/be/src/exec/assert_num_rows_node.h @@ -24,7 +24,7 @@ namespace doris { class AssertNumRowsNode : public ExecNode { public: AssertNumRowsNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual ~AssertNumRowsNode(){}; + virtual ~AssertNumRowsNode() {}; virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); virtual Status prepare(RuntimeState* state); diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index 796defc6d0..ca5b08831f 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -57,7 +57,8 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, _read_timer(nullptr), _materialize_timer(nullptr), _success(false), - _scanner_eof(false) {} + _scanner_eof(false) { +} Status BaseScanner::open() { RETURN_IF_ERROR(init_expr_ctxes()); diff --git a/be/src/exec/base_scanner.h b/be/src/exec/base_scanner.h index 338818914b..8b97a97a3f 100644 --- a/be/src/exec/base_scanner.h +++ b/be/src/exec/base_scanner.h @@ -36,7 +36,7 @@ class ExprContext; namespace vectorized { class IColumn; using MutableColumnPtr = IColumn::MutablePtr; -} +} // namespace vectorized // The counter will be passed to each scanner. // Note that this struct is not thread safe. @@ -59,7 +59,7 @@ public: virtual Status open(); // Get next tuple - virtual Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool *fill_tuple) = 0; + virtual Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) = 0; // Get next block virtual Status get_next(std::vector& columns, bool* eof) { @@ -74,7 +74,7 @@ public: const std::vector& columns_from_path); void free_expr_local_allocations(); - + protected: RuntimeState* _state; const TBrokerScanRangeParams& _params; diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp index ce93dc2c5c..d88ce56ecf 100644 --- a/be/src/exec/blocking_join_node.cpp +++ b/be/src/exec/blocking_join_node.cpp @@ -33,7 +33,9 @@ namespace doris { BlockingJoinNode::BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), _node_name(node_name), _join_op(join_op), + : ExecNode(pool, tnode, descs), + _node_name(node_name), + _join_op(join_op), _left_side_eos(false) {} Status BlockingJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { diff --git a/be/src/exec/broker_scan_node.cpp b/be/src/exec/broker_scan_node.cpp index ce450745a7..4ae286e944 100644 --- a/be/src/exec/broker_scan_node.cpp +++ b/be/src/exec/broker_scan_node.cpp @@ -229,19 +229,19 @@ std::unique_ptr BrokerScanNode::create_scanner(const TBrokerScanRan break; case TFileFormatType::FORMAT_ORC: scan = new ORCScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); + scan_range.ranges, scan_range.broker_addresses, _pre_filter_texprs, + counter); break; case TFileFormatType::FORMAT_JSON: scan = new JsonScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); + scan_range.ranges, scan_range.broker_addresses, _pre_filter_texprs, + counter); break; default: if (_vectorized) { - scan = new vectorized::VBrokerScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); + scan = new vectorized::VBrokerScanner( + _runtime_state, runtime_profile(), scan_range.params, scan_range.ranges, + scan_range.broker_addresses, _pre_filter_texprs, counter); } else { scan = new BrokerScanner(_runtime_state, runtime_profile(), scan_range.params, scan_range.ranges, scan_range.broker_addresses, @@ -262,8 +262,7 @@ Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, while (!scanner_eof) { // Fill one row batch - std::shared_ptr row_batch( - new RowBatch(row_desc(), _runtime_state->batch_size())); + std::shared_ptr row_batch(new RowBatch(row_desc(), _runtime_state->batch_size())); // create new tuple buffer for row_batch MemPool* tuple_pool = row_batch->tuple_data_pool(); @@ -282,7 +281,7 @@ Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, } // This row batch has been filled up, and break this - if (row_batch->is_full() || row_batch->is_full_uncommited() ) { + if (row_batch->is_full() || row_batch->is_full_uncommited()) { break; } @@ -299,7 +298,7 @@ Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, continue; } - // if read row succeed, but fill dest tuple fail, we need to increase # of uncommitted rows, + // if read row succeed, but fill dest tuple fail, we need to increase # of uncommitted rows, // once reach the capacity of row batch, will transfer the row batch to next operator to release memory if (!tuple_fill) { row_batch->increase_uncommitted_rows(); @@ -393,4 +392,4 @@ void BrokerScanNode::scanner_worker(int start_idx, int length) { Expr::close(scanner_expr_ctxs, _runtime_state); } -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index f91a685c44..cda19d8611 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -46,8 +46,7 @@ BrokerScanner::BrokerScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, - ScannerCounter* counter) + const std::vector& pre_filter_texprs, ScannerCounter* counter) : BaseScanner(state, profile, params, pre_filter_texprs, counter), _ranges(ranges), _broker_addresses(broker_addresses), @@ -457,7 +456,8 @@ bool is_null(const Slice& slice) { } // Convert one row to this tuple -Status BrokerScanner::_convert_one_row(const Slice& line, Tuple* tuple, MemPool* tuple_pool, bool* fill_tuple) { +Status BrokerScanner::_convert_one_row(const Slice& line, Tuple* tuple, MemPool* tuple_pool, + bool* fill_tuple) { RETURN_IF_ERROR(_line_to_src_tuple(line)); if (!_success) { // If not success, which means we met an invalid row, return. @@ -472,12 +472,14 @@ Status BrokerScanner::_convert_one_row(const Slice& line, Tuple* tuple, MemPool* Status BrokerScanner::_line_to_src_tuple(const Slice& line) { bool is_proto_format = _file_format_type == TFileFormatType::FORMAT_PROTO; if (!is_proto_format && !validate_utf8(line.data, line.size)) { - RETURN_IF_ERROR(_state->append_error_msg_to_file([]() -> std::string { return "Unable to display"; }, + RETURN_IF_ERROR(_state->append_error_msg_to_file( + []() -> std::string { return "Unable to display"; }, []() -> std::string { fmt::memory_buffer error_msg; fmt::format_to(error_msg, "{}", "Unable to display"); return fmt::to_string(error_msg); - }, &_scanner_eof)); + }, + &_scanner_eof)); _counter->num_rows_filtered++; _success = false; return Status::OK(); @@ -494,7 +496,7 @@ Status BrokerScanner::_line_to_src_tuple(const Slice& line) { const std::vector& columns_from_path = range.columns_from_path; // read data by column defination, resize _split_values to _src_solt_size if (read_by_column_def) { - // fill slots by NULL + // fill slots by NULL while (_split_values.size() + columns_from_path.size() < _src_slot_descs.size()) { _split_values.emplace_back(_split_values.back().get_data(), 0); } @@ -505,27 +507,39 @@ Status BrokerScanner::_line_to_src_tuple(const Slice& line) { } else { if (_split_values.size() + columns_from_path.size() < _src_slot_descs.size()) { RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return is_proto_format ? "" : std::string(line.data, line.size); }, - [&]() -> std::string { + [&]() -> std::string { + return is_proto_format ? "" : std::string(line.data, line.size); + }, + [&]() -> std::string { fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", "actual column number is less than schema column number."); - fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", _split_values.size(), _value_separator); - fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", _line_delimiter, _src_slot_descs.size()); + fmt::format_to(error_msg, "{}", + "actual column number is less than schema column number."); + fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", + _split_values.size(), _value_separator); + fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", + _line_delimiter, _src_slot_descs.size()); return fmt::to_string(error_msg); - }, &_scanner_eof)); + }, + &_scanner_eof)); _counter->num_rows_filtered++; _success = false; return Status::OK(); } else if (_split_values.size() + columns_from_path.size() > _src_slot_descs.size()) { RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return is_proto_format ? "" : std::string(line.data, line.size); }, - [&]() -> std::string { + [&]() -> std::string { + return is_proto_format ? "" : std::string(line.data, line.size); + }, + [&]() -> std::string { fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", "actual column number is more than schema column number."); - fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", _split_values.size(), _value_separator); - fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", _line_delimiter, _src_slot_descs.size()); + fmt::format_to(error_msg, "{}", + "actual column number is more than schema column number."); + fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", + _split_values.size(), _value_separator); + fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", + _line_delimiter, _src_slot_descs.size()); return fmt::to_string(error_msg); - }, &_scanner_eof)); + }, + &_scanner_eof)); _counter->num_rows_filtered++; _success = false; return Status::OK(); diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index f94056036d..e9b6eefa26 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -83,7 +83,8 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink // TODO: figure out good buffer size based on size of output row if (is_vec) { - tmp_sink = new doris::vectorized::VResultSink(row_desc, output_exprs, thrift_sink.result_sink, 4096); + tmp_sink = new doris::vectorized::VResultSink(row_desc, output_exprs, + thrift_sink.result_sink, 4096); } else { tmp_sink = new ResultSink(row_desc, output_exprs, thrift_sink.result_sink, 1024); } @@ -119,7 +120,8 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink return Status::InternalError("Missing data buffer sink."); } if (is_vec) { - doris::vectorized::VMysqlTableSink* vmysql_tbl_sink = new doris::vectorized::VMysqlTableSink(pool, row_desc, output_exprs); + doris::vectorized::VMysqlTableSink* vmysql_tbl_sink = + new doris::vectorized::VMysqlTableSink(pool, row_desc, output_exprs); sink->reset(vmysql_tbl_sink); } else { // TODO: figure out good buffer size based on size of output row diff --git a/be/src/exec/decompressor.h b/be/src/exec/decompressor.h index e53ebd5711..0b10b874fc 100644 --- a/be/src/exec/decompressor.h +++ b/be/src/exec/decompressor.h @@ -172,9 +172,7 @@ private: return ptr + sizeof(uint32_t); } - LzoChecksum header_type(int flags) { - return (flags & F_H_CRC32) ? CHECK_CRC32 : CHECK_ADLER; - } + LzoChecksum header_type(int flags) { return (flags & F_H_CRC32) ? CHECK_CRC32 : CHECK_ADLER; } LzoChecksum input_type(int flags) { return (flags & F_CRC32_C) ? CHECK_CRC32 : (flags & F_ADLER32_C) ? CHECK_ADLER : CHECK_NONE; diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 1f8ecabed6..dbb00939b8 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -412,7 +412,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, } size_t val_size = val.length(); Status rst; - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size, &rst)); + char* buffer = + reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size, &rst)); if (UNLIKELY(buffer == nullptr)) { std::string details = strings::Substitute( ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); diff --git a/be/src/exec/es_scan_node.cpp b/be/src/exec/es_scan_node.cpp index 5f7ddb1b53..a0a89d3a68 100644 --- a/be/src/exec/es_scan_node.cpp +++ b/be/src/exec/es_scan_node.cpp @@ -775,7 +775,8 @@ Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple, const string& val = col.string_vals[val_idx]; size_t val_size = val.size(); Status rst; - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size, &rst)); + char* buffer = + reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size, &rst)); if (UNLIKELY(buffer == nullptr)) { std::string details = strings::Substitute( ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); diff --git a/be/src/exec/hdfs_reader_writer.cpp b/be/src/exec/hdfs_reader_writer.cpp index 86109c3024..956fc9e40e 100644 --- a/be/src/exec/hdfs_reader_writer.cpp +++ b/be/src/exec/hdfs_reader_writer.cpp @@ -24,10 +24,8 @@ namespace doris { -Status HdfsReaderWriter::create_reader(const THdfsParams& hdfs_params, - const std::string& path, - int64_t start_offset, - FileReader** reader) { +Status HdfsReaderWriter::create_reader(const THdfsParams& hdfs_params, const std::string& path, + int64_t start_offset, FileReader** reader) { #if defined(__x86_64__) *reader = new HdfsFileReader(hdfs_params, path, start_offset); return Status::OK(); @@ -37,15 +35,13 @@ Status HdfsReaderWriter::create_reader(const THdfsParams& hdfs_params, } Status HdfsReaderWriter::create_writer(std::map& properties, - const std::string& path, - FileWriter** writer) { + const std::string& path, FileWriter** writer) { #if defined(__x86_64__) *writer = new HDFSWriter(properties, path); return Status::OK(); #else return Status::InternalError("HdfsWriter do not support on non x86 platform"); #endif - } } // namespace doris diff --git a/be/src/exec/hdfs_reader_writer.h b/be/src/exec/hdfs_reader_writer.h index 996171ae1a..e0decf2035 100644 --- a/be/src/exec/hdfs_reader_writer.h +++ b/be/src/exec/hdfs_reader_writer.h @@ -28,17 +28,14 @@ namespace doris { // we use this class to shield the upper layer from the need to deal with the platform environment // when creating a raeder or writer. // -// If in the arm64 environment, creating a reader or writer through this class will return an error. +// If in the arm64 environment, creating a reader or writer through this class will return an error. class HdfsReaderWriter { public: - static Status create_reader(const THdfsParams& hdfs_params, - const std::string& path, - int64_t start_offset, - FileReader** reader); + static Status create_reader(const THdfsParams& hdfs_params, const std::string& path, + int64_t start_offset, FileReader** reader); static Status create_writer(std::map& properties, - const std::string& path, - FileWriter** writer); + const std::string& path, FileWriter** writer); }; } // namespace doris diff --git a/be/src/exec/hdfs_writer.cpp b/be/src/exec/hdfs_writer.cpp index b9ab1a169e..a362488d8b 100644 --- a/be/src/exec/hdfs_writer.cpp +++ b/be/src/exec/hdfs_writer.cpp @@ -30,9 +30,7 @@ const static std::string KERB_TICKET_CACHE_PATH = "kerb_ticket_cache_path"; const static std::string TOKEN = "token"; HDFSWriter::HDFSWriter(std::map& properties, const std::string& path) - : _properties(properties), - _path(path), - _hdfs_fs(nullptr) { + : _properties(properties), _path(path), _hdfs_fs(nullptr) { _parse_properties(_properties); } @@ -59,9 +57,10 @@ Status HDFSWriter::open() { int ret = hdfsCreateDirectory(_hdfs_fs, hdfs_dir.c_str()); if (ret != 0) { std::stringstream ss; - ss << "create dir failed. " << "(BE: " << BackendOptions::get_localhost() << ")" - << " namenode: " << _namenode << " path: " << hdfs_dir - << ", err: " << strerror(errno); + ss << "create dir failed. " + << "(BE: " << BackendOptions::get_localhost() << ")" + << " namenode: " << _namenode << " path: " << hdfs_dir + << ", err: " << strerror(errno); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -70,9 +69,9 @@ Status HDFSWriter::open() { _hdfs_file = hdfsOpenFile(_hdfs_fs, _path.c_str(), O_WRONLY, 0, 0, 0); if (_hdfs_file == nullptr) { std::stringstream ss; - ss << "open file failed. " << "(BE: " << BackendOptions::get_localhost() << ")" - << " namenode:" << _namenode << " path:" << _path - << ", err: " << strerror(errno); + ss << "open file failed. " + << "(BE: " << BackendOptions::get_localhost() << ")" + << " namenode:" << _namenode << " path:" << _path << ", err: " << strerror(errno); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -88,14 +87,14 @@ Status HDFSWriter::write(const uint8_t* buf, size_t buf_len, size_t* written_len int32_t result = hdfsWrite(_hdfs_fs, _hdfs_file, buf, buf_len); if (result < 0) { std::stringstream ss; - ss << "write file failed. " << "(BE: " << BackendOptions::get_localhost() << ")" - << "namenode:" << _namenode << " path:" << _path - << ", err: " << strerror(errno); + ss << "write file failed. " + << "(BE: " << BackendOptions::get_localhost() << ")" + << "namenode:" << _namenode << " path:" << _path << ", err: " << strerror(errno); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } - *written_len = (unsigned int) result; + *written_len = (unsigned int)result; return Status::OK(); } @@ -115,9 +114,9 @@ Status HDFSWriter::close() { int result = hdfsFlush(_hdfs_fs, _hdfs_file); if (result == -1) { std::stringstream ss; - ss << "failed to flush hdfs file. " << "(BE: " << BackendOptions::get_localhost() << ")" - << "namenode:" << _namenode << " path:" << _path - << ", err: " << strerror(errno); + ss << "failed to flush hdfs file. " + << "(BE: " << BackendOptions::get_localhost() << ")" + << "namenode:" << _namenode << " path:" << _path << ", err: " << strerror(errno); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } @@ -201,4 +200,4 @@ Status HDFSWriter::_parse_properties(std::map& prop) { return Status::OK(); } -}// end namespace doris +} // end namespace doris diff --git a/be/src/exec/hdfs_writer.h b/be/src/exec/hdfs_writer.h index a3f17ec166..8bc9060c2a 100644 --- a/be/src/exec/hdfs_writer.h +++ b/be/src/exec/hdfs_writer.h @@ -26,7 +26,6 @@ namespace doris { class HDFSWriter : public FileWriter { - public: HDFSWriter(std::map& properties, const std::string& path); ~HDFSWriter(); @@ -54,4 +53,4 @@ private: bool _closed = false; }; -} +} // namespace doris diff --git a/be/src/exec/intersect_node.cpp b/be/src/exec/intersect_node.cpp index a12673a1b5..2f218ca399 100644 --- a/be/src/exec/intersect_node.cpp +++ b/be/src/exec/intersect_node.cpp @@ -60,8 +60,7 @@ Status IntersectNode::open(RuntimeState* state) { _valid_element_in_hash_tbl = 0; // probe - _probe_batch.reset( - new RowBatch(child(i)->row_desc(), state->batch_size())); + _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; diff --git a/be/src/exec/json_scanner.cpp b/be/src/exec/json_scanner.cpp index 945afde21d..9a58c0fae7 100644 --- a/be/src/exec/json_scanner.cpp +++ b/be/src/exec/json_scanner.cpp @@ -65,7 +65,7 @@ Status JsonScanner::open() { return BaseScanner::open(); } -Status JsonScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool *fill_tuple) { +Status JsonScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) { SCOPED_TIMER(_read_timer); // Get one line while (!_scanner_eof) { @@ -87,7 +87,7 @@ Status JsonScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool bool is_empty_row = false; RETURN_IF_ERROR(_cur_json_reader->read_json_row(_src_tuple, _src_slot_descs, tuple_pool, - &is_empty_row, &_cur_reader_eof)); + &is_empty_row, &_cur_reader_eof)); if (is_empty_row) { // Read empty row, just continue @@ -157,8 +157,8 @@ Status JsonScanner::open_file_reader() { break; } case TFileType::FILE_S3: { - BufferedReader* s3_reader = - new BufferedReader(_profile, new S3Reader(_params.properties, range.path, start_offset)); + BufferedReader* s3_reader = new BufferedReader( + _profile, new S3Reader(_params.properties, range.path, start_offset)); RETURN_IF_ERROR(s3_reader->open()); _cur_file_reader = s3_reader; break; @@ -196,8 +196,8 @@ Status JsonScanner::open_line_reader() { } else { _skip_next_line = false; } - _cur_line_reader = new PlainTextLineReader(_profile, _cur_file_reader, nullptr, - size, _line_delimiter, _line_delimiter_length); + _cur_line_reader = new PlainTextLineReader(_profile, _cur_file_reader, nullptr, size, + _line_delimiter, _line_delimiter_length); _cur_reader_eof = false; return Status::OK(); } @@ -236,8 +236,9 @@ Status JsonScanner::open_json_reader() { new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string, fuzzy_parse, &_scanner_eof, nullptr, _cur_line_reader); } else { - _cur_json_reader = new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string, - fuzzy_parse, &_scanner_eof, _cur_file_reader); + _cur_json_reader = + new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string, + fuzzy_parse, &_scanner_eof, _cur_file_reader); } RETURN_IF_ERROR(_cur_json_reader->init(jsonpath, json_root)); @@ -280,9 +281,8 @@ rapidjson::Value::ConstValueIterator JsonDataInternal::get_next() { ////// class JsonReader JsonReader::JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, - bool strip_outer_array, bool num_as_string,bool fuzzy_parse, - bool* scanner_eof, - FileReader* file_reader, LineReader* line_reader) + bool strip_outer_array, bool num_as_string, bool fuzzy_parse, + bool* scanner_eof, FileReader* file_reader, LineReader* line_reader) : _handle_json_callback(nullptr), _next_line(0), _total_lines(0), @@ -402,8 +402,10 @@ Status JsonReader::_parse_json_doc(size_t* size, bool* eof) { if (has_parse_error) { fmt::memory_buffer error_msg; fmt::format_to(error_msg, "Parse json data for JsonDoc failed. code: {}, error info: {}", - _origin_json_doc.GetParseError(), rapidjson::GetParseError_En(_origin_json_doc.GetParseError())); - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return std::string((char*)json_str, *size); }, + _origin_json_doc.GetParseError(), + rapidjson::GetParseError_En(_origin_json_doc.GetParseError())); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return std::string((char*)json_str, *size); }, [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); _counter->num_rows_filtered++; if (*_scanner_eof) { @@ -423,7 +425,8 @@ Status JsonReader::_parse_json_doc(size_t* size, bool* eof) { if (_json_doc == nullptr) { fmt::memory_buffer error_msg; fmt::format_to(error_msg, "{}", "JSON Root not found."); - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(_origin_json_doc); }, + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(_origin_json_doc); }, [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); _counter->num_rows_filtered++; if (*_scanner_eof) { @@ -439,8 +442,10 @@ Status JsonReader::_parse_json_doc(size_t* size, bool* eof) { if (_json_doc->IsArray() && !_strip_outer_array) { fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", "JSON data is array-object, `strip_outer_array` must be TRUE."); - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(_origin_json_doc); }, + fmt::format_to(error_msg, "{}", + "JSON data is array-object, `strip_outer_array` must be TRUE."); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(_origin_json_doc); }, [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); _counter->num_rows_filtered++; if (*_scanner_eof) { @@ -453,8 +458,10 @@ Status JsonReader::_parse_json_doc(size_t* size, bool* eof) { if (!_json_doc->IsArray() && _strip_outer_array) { fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", "JSON data is not an array-object, `strip_outer_array` must be FALSE."); - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(_origin_json_doc); }, + fmt::format_to(error_msg, "{}", + "JSON data is not an array-object, `strip_outer_array` must be FALSE."); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(_origin_json_doc); }, [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); _counter->num_rows_filtered++; if (*_scanner_eof) { @@ -495,8 +502,8 @@ void JsonReader::_fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* me } Status JsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator value, - SlotDescriptor* desc, Tuple* tuple, MemPool* tuple_pool, - bool* valid) { + SlotDescriptor* desc, Tuple* tuple, MemPool* tuple_pool, + bool* valid) { const char* str_value = nullptr; uint8_t tmp_buf[128] = {0}; int32_t wbytes = 0; @@ -533,12 +540,16 @@ Status JsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator val if (desc->is_nullable()) { tuple->set_null(desc->null_indicator_offset()); } else { - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(*value); }, + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(*value); }, [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "Json value is null, but the column `{}` is not nullable.", desc->col_name()); - return fmt::to_string(error_msg); - }, _scanner_eof)); + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, + "Json value is null, but the column `{}` is not nullable.", + desc->col_name()); + return fmt::to_string(error_msg); + }, + _scanner_eof)); _counter->num_rows_filtered++; *valid = false; return Status::OK(); @@ -559,12 +570,13 @@ Status JsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator val // set valid to false and return OK if we met an invalid row. // return other status if encounter other problmes. Status JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, - const std::vector& slot_descs, - MemPool* tuple_pool, bool* valid) { + const std::vector& slot_descs, + MemPool* tuple_pool, bool* valid) { if (!objectValue.IsObject()) { // Here we expect the incoming `objectValue` to be a Json Object, such as {"key" : "value"}, // not other type of Json format. - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(objectValue); }, + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, [&]() -> std::string { return "Expect json object value"; }, _scanner_eof)); _counter->num_rows_filtered++; *valid = false; // current row is invalid @@ -594,12 +606,17 @@ Status JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, tuple->set_null(v->null_indicator_offset()); nullcount++; } else { - RETURN_IF_ERROR( _state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(objectValue); }, + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "The column `{}` is not nullable, but it's not found in jsondata.", v->col_name()); - return fmt::to_string(error_msg); - }, _scanner_eof)); + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, + "The column `{}` is not nullable, but it's not found in " + "jsondata.", + v->col_name()); + return fmt::to_string(error_msg); + }, + _scanner_eof)); _counter->num_rows_filtered++; *valid = false; // current row is invalid break; @@ -608,8 +625,10 @@ Status JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, } if (nullcount == slot_descs.size()) { - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(objectValue); }, - [&]() -> std::string { return "All fields is null, this is a invalid row."; }, _scanner_eof)); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, + [&]() -> std::string { return "All fields is null, this is a invalid row."; }, + _scanner_eof)); _counter->num_rows_filtered++; *valid = false; return Status::OK(); @@ -636,8 +655,8 @@ Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vectorSize(); if (_total_lines == 0) { // may be passing an empty json, such as "[]" - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(*_json_doc); }, + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(*_json_doc); }, [&]() -> std::string { return "Empty json line"; }, _scanner_eof)); _counter->num_rows_filtered++; if (*_scanner_eof) { @@ -708,7 +728,8 @@ Status JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemP bool wrap_explicitly = false; if (LIKELY(i < _parsed_jsonpaths.size())) { json_values = JsonFunctions::get_json_array_from_parsed_json( - _parsed_jsonpaths[i], &objectValue, _origin_json_doc.GetAllocator(), &wrap_explicitly); + _parsed_jsonpaths[i], &objectValue, _origin_json_doc.GetAllocator(), + &wrap_explicitly); } if (json_values == nullptr) { @@ -717,12 +738,17 @@ Status JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemP tuple->set_null(slot_descs[i]->null_indicator_offset()); nullcount++; } else { - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(objectValue); }, + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "The column `{}` is not nullable, but it's not found in jsondata.", slot_descs[i]->col_name()); - return fmt::to_string(error_msg); - }, _scanner_eof)); + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, + "The column `{}` is not nullable, but it's not found in " + "jsondata.", + slot_descs[i]->col_name()); + return fmt::to_string(error_msg); + }, + _scanner_eof)); _counter->num_rows_filtered++; *valid = false; // current row is invalid break; @@ -735,15 +761,20 @@ Status JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemP // if json_values' size > 1, it means we just match an array, not a wrapped one, so no need to unwrap. json_values = &((*json_values)[0]); } - RETURN_IF_ERROR(_write_data_to_tuple(json_values, slot_descs[i], tuple, tuple_pool, valid)); + RETURN_IF_ERROR( + _write_data_to_tuple(json_values, slot_descs[i], tuple, tuple_pool, valid)); if (!(*valid)) { break; } } } if (nullcount == column_num) { - RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string { return _print_json_value(objectValue); }, - [&]() -> std::string { return "All fields is null or not matched, this is a invalid row."; }, _scanner_eof)); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, + [&]() -> std::string { + return "All fields is null or not matched, this is a invalid row."; + }, + _scanner_eof)); _counter->num_rows_filtered++; *valid = false; } @@ -796,7 +827,8 @@ Status JsonReader::_handle_nested_complex_json(Tuple* tuple, */ Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof) { + MemPool* tuple_pool, bool* is_empty_row, + bool* eof) { do { if (_next_line >= _total_lines) { size_t size = 0; @@ -804,8 +836,8 @@ Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, if (st.is_data_quality_error()) { continue; // continue to read next } - RETURN_IF_ERROR(st); // terminate if encounter other errors - if (size == 0 || *eof) { // read all data, then return + RETURN_IF_ERROR(st); // terminate if encounter other errors + if (size == 0 || *eof) { // read all data, then return *is_empty_row = true; return Status::OK(); } @@ -820,7 +852,8 @@ Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, } rapidjson::Value& objectValue = (*_json_doc)[_next_line++]; bool valid = true; - RETURN_IF_ERROR(_write_values_by_jsonpath(objectValue, tuple_pool, tuple, slot_descs, &valid)); + RETURN_IF_ERROR( + _write_values_by_jsonpath(objectValue, tuple_pool, tuple, slot_descs, &valid)); if (!valid) { continue; // process next line } @@ -831,7 +864,7 @@ Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, } Status JsonReader::read_json_row(Tuple* tuple, const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof) { + MemPool* tuple_pool, bool* is_empty_row, bool* eof) { return (this->*_handle_json_callback)(tuple, slot_descs, tuple_pool, is_empty_row, eof); } diff --git a/be/src/exec/json_scanner.h b/be/src/exec/json_scanner.h index 91528c8351..2f5c9e3b82 100644 --- a/be/src/exec/json_scanner.h +++ b/be/src/exec/json_scanner.h @@ -56,15 +56,14 @@ public: JsonScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, - ScannerCounter* counter); + const std::vector& pre_filter_texprs, ScannerCounter* counter); ~JsonScanner(); // Open this scanner, will initialize information needed Status open() override; // Get next tuple - Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool *fill_tuple) override; + Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override; // Close this scanner void close() override; @@ -121,16 +120,15 @@ struct JsonPath; class JsonReader { public: JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, - bool strip_outer_array, bool num_as_string,bool fuzzy_parse, - bool* scanner_eof, + bool strip_outer_array, bool num_as_string, bool fuzzy_parse, bool* scanner_eof, FileReader* file_reader = nullptr, LineReader* line_reader = nullptr); ~JsonReader(); Status init(const std::string& jsonpath, const std::string& json_root); // must call before use - Status read_json_row(Tuple* tuple, const std::vector& slot_descs, MemPool* tuple_pool, - bool* is_empty_row, bool* eof); + Status read_json_row(Tuple* tuple, const std::vector& slot_descs, + MemPool* tuple_pool, bool* is_empty_row, bool* eof); private: Status (JsonReader::*_handle_json_callback)(Tuple* tuple, @@ -148,12 +146,13 @@ private: const uint8_t* value, int32_t len); Status _parse_json_doc(size_t* size, bool* eof); Status _set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, - const std::vector& slot_descs, MemPool* tuple_pool, - bool* valid); + const std::vector& slot_descs, MemPool* tuple_pool, + bool* valid); Status _write_data_to_tuple(rapidjson::Value::ConstValueIterator value, SlotDescriptor* desc, - Tuple* tuple, MemPool* tuple_pool, bool* valid); - Status _write_values_by_jsonpath(rapidjson::Value& objectValue, MemPool* tuple_pool, Tuple* tuple, - const std::vector& slot_descs, bool* valid); + Tuple* tuple, MemPool* tuple_pool, bool* valid); + Status _write_values_by_jsonpath(rapidjson::Value& objectValue, MemPool* tuple_pool, + Tuple* tuple, const std::vector& slot_descs, + bool* valid); std::string _print_json_value(const rapidjson::Value& value); std::string _print_jsonpath(const std::vector& path); @@ -183,7 +182,7 @@ private: rapidjson::Document _origin_json_doc; // origin json document object from parsed json string rapidjson::Value* _json_doc; // _json_doc equals _final_json_doc iff not set `json_root` std::unordered_map _name_map; - + // point to the _scanner_eof of JsonScanner bool* _scanner_eof; }; diff --git a/be/src/exec/merge_join_node.h b/be/src/exec/merge_join_node.h index ef02727be5..fdb2911633 100644 --- a/be/src/exec/merge_join_node.h +++ b/be/src/exec/merge_join_node.h @@ -66,10 +66,7 @@ private: bool is_eos; TupleRow* current_row; ChildReaderContext(const RowDescriptor& desc, int batch_size) - : batch(desc, batch_size), - row_idx(0), - is_eos(false), - current_row(nullptr) {} + : batch(desc, batch_size), row_idx(0), is_eos(false), current_row(nullptr) {} }; // _left_batch must be cleared before calling get_next(). used cache child(0)'s data // _right_batch must be cleared before calling get_next(). used cache child(1)'s data diff --git a/be/src/exec/mysql_scan_node.h b/be/src/exec/mysql_scan_node.h index ff5fb54f99..3abbf83115 100644 --- a/be/src/exec/mysql_scan_node.h +++ b/be/src/exec/mysql_scan_node.h @@ -63,6 +63,7 @@ private: // The Mysql value is converted into the appropriate target type. Status write_text_slot(char* value, int value_length, SlotDescriptor* slot, RuntimeState* state); + protected: bool _is_init; MysqlScannerParam _my_param; diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index bdbec2a7c6..661ed94844 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -317,10 +317,10 @@ private: bool _is_convertible; }; -typedef std::variant< - ColumnValueRange, ColumnValueRange, ColumnValueRange, - ColumnValueRange, ColumnValueRange<__int128>, ColumnValueRange, - ColumnValueRange, ColumnValueRange, ColumnValueRange> +typedef std::variant, ColumnValueRange, ColumnValueRange, + ColumnValueRange, ColumnValueRange<__int128>, + ColumnValueRange, ColumnValueRange, + ColumnValueRange, ColumnValueRange> ColumnValueRangeType; template diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 778370c16c..e17e26bcd9 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -318,7 +318,8 @@ Status OlapScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eo materialized_batch = _materialized_row_batches.front(); DCHECK(materialized_batch != nullptr); _materialized_row_batches.pop_front(); - _materialized_row_batches_bytes -= materialized_batch->tuple_data_pool()->total_reserved_bytes(); + _materialized_row_batches_bytes -= + materialized_batch->tuple_data_pool()->total_reserved_bytes(); } } @@ -777,7 +778,8 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) { auto tablet_id = scan_range->tablet_id; int32_t schema_hash = strtoul(scan_range->schema_hash.c_str(), nullptr, 10); std::string err; - TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, true, &err); + TabletSharedPtr tablet = + StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, true, &err); if (tablet == nullptr) { std::stringstream ss; ss << "failed to get tablet: " << tablet_id << " with schema hash: " << schema_hash @@ -1212,7 +1214,8 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, std::string is_null_str; // 1. dispose the where pred "A is null" and "A is not null" if (root_expr->is_null_scalar_function(is_null_str) && - normalize_is_null_predicate(root_expr->get_child(0), slot, is_null_str, range)) { + normalize_is_null_predicate(root_expr->get_child(0), slot, is_null_str, + range)) { // if column is key column should push down conjunct storage engine if (is_key_column(slot->col_name())) { filter_conjuncts_index.emplace_back(conj_idx); diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 68a522236c..2478d2b2bf 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -51,8 +51,7 @@ OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool _need_agg_finalize(need_agg_finalize), _version(-1), _mem_tracker(MemTracker::create_tracker( - tracker->limit(), - tracker->label() + ":OlapScanner:" + tls_ctx()->thread_id_str(), + tracker->limit(), tracker->label() + ":OlapScanner:" + tls_ctx()->thread_id_str(), tracker)) {} Status OlapScanner::prepare( @@ -302,7 +301,7 @@ Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { } if (tmp_object_pool.size() > 0) { - unused_object_pool.acquire_data(&tmp_object_pool); + unused_object_pool.acquire_data(&tmp_object_pool); } if (unused_object_pool.size() >= config::object_pool_buffer_size) { diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index 5c336dffd6..a05d07a66e 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -93,13 +93,13 @@ public: const std::vector& get_query_slots() const { return _query_slots; } - const std::shared_ptr& mem_tracker() const {return _mem_tracker;} + const std::shared_ptr& mem_tracker() const { return _mem_tracker; } protected: - Status _init_tablet_reader_params(const std::vector& key_ranges, - const std::vector& filters, - const std::vector>>& - bloom_filters); + Status _init_tablet_reader_params( + const std::vector& key_ranges, const std::vector& filters, + const std::vector>>& + bloom_filters); Status _init_return_columns(); void _convert_row_to_tuple(Tuple* tuple); diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index fbefe1cb40..fdda223c1a 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -119,8 +119,7 @@ ORCScanner::ORCScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, - ScannerCounter* counter) + const std::vector& pre_filter_texprs, ScannerCounter* counter) : BaseScanner(state, profile, params, pre_filter_texprs, counter), _ranges(ranges), _broker_addresses(broker_addresses), @@ -154,7 +153,7 @@ Status ORCScanner::open() { return Status::OK(); } -Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple ) { +Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) { try { SCOPED_TIMER(_read_timer); // Get one line @@ -402,20 +401,21 @@ Status ORCScanner::open_next_reader() { if (range.__isset.file_size) { file_size = range.file_size; } - file_reader.reset(new BufferedReader(_profile, new BrokerReader(_state->exec_env(), _broker_addresses, - _params.properties, range.path, range.start_offset, - file_size))); + file_reader.reset(new BufferedReader( + _profile, + new BrokerReader(_state->exec_env(), _broker_addresses, _params.properties, + range.path, range.start_offset, file_size))); break; } case TFileType::FILE_S3: { - file_reader.reset(new BufferedReader(_profile, - new S3Reader(_params.properties, range.path, range.start_offset))); + file_reader.reset(new BufferedReader( + _profile, new S3Reader(_params.properties, range.path, range.start_offset))); break; } case TFileType::FILE_HDFS: { #if defined(__x86_64__) - file_reader.reset(new HdfsFileReader( - range.hdfs_params, range.path, range.start_offset)); + file_reader.reset( + new HdfsFileReader(range.hdfs_params, range.path, range.start_offset)); break; #else return Status::InternalError("HdfsFileReader do not support on non x86 platform"); diff --git a/be/src/exec/orc_scanner.h b/be/src/exec/orc_scanner.h index 86b73b6b99..31449bb0d3 100644 --- a/be/src/exec/orc_scanner.h +++ b/be/src/exec/orc_scanner.h @@ -30,8 +30,7 @@ public: ORCScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, - ScannerCounter* counter); + const std::vector& pre_filter_texprs, ScannerCounter* counter); ~ORCScanner() override; @@ -39,7 +38,7 @@ public: Status open() override; // Get next tuple - Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool *fill_tuple) override; + Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override; // Close this scanner void close() override; diff --git a/be/src/exec/plain_binary_line_reader.cpp b/be/src/exec/plain_binary_line_reader.cpp index e680e15023..9cf1ff473f 100644 --- a/be/src/exec/plain_binary_line_reader.cpp +++ b/be/src/exec/plain_binary_line_reader.cpp @@ -22,16 +22,13 @@ namespace doris { -PlainBinaryLineReader::PlainBinaryLineReader(FileReader* file_reader) - : _file_reader(file_reader) { -} +PlainBinaryLineReader::PlainBinaryLineReader(FileReader* file_reader) : _file_reader(file_reader) {} PlainBinaryLineReader::~PlainBinaryLineReader() { close(); } -void PlainBinaryLineReader::close() { -} +void PlainBinaryLineReader::close() {} Status PlainBinaryLineReader::read_line(const uint8_t** ptr, size_t* size, bool* eof) { std::unique_ptr file_buf; diff --git a/be/src/exec/schema_scan_node.h b/be/src/exec/schema_scan_node.h index f45086c78f..2439da04f7 100644 --- a/be/src/exec/schema_scan_node.h +++ b/be/src/exec/schema_scan_node.h @@ -62,6 +62,7 @@ private: void debug_string(int indentation_level, std::stringstream* out) const override; // Copy one row from schema table to input tuple void copy_one_row(); + protected: bool _is_init; const std::string _table_name; diff --git a/be/src/exec/schema_scanner/schema_statistics_scanner.cpp b/be/src/exec/schema_scanner/schema_statistics_scanner.cpp index 995178edc7..26ef280214 100644 --- a/be/src/exec/schema_scanner/schema_statistics_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_statistics_scanner.cpp @@ -22,31 +22,28 @@ namespace doris { SchemaScanner::ColumnDesc SchemaStatisticsScanner::_s_cols_statistics[] = { - // name, type, size, is_null - { "TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true }, - { "TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false }, - { "TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false }, - { "NON_UNIQUE", TYPE_BIGINT, sizeof(int64_t), false }, - { "INDEX_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false }, - { "INDEX_NAME", TYPE_VARCHAR, sizeof(StringValue), false }, - { "SEQ_IN_INDEX", TYPE_BIGINT, sizeof(int64_t), false }, - { "COLUMN_NAME", TYPE_VARCHAR, sizeof(StringValue), false }, - { "COLLATION", TYPE_VARCHAR, sizeof(StringValue), true }, - { "CARDINALITY", TYPE_BIGINT, sizeof(int64_t), true }, - { "SUB_PART", TYPE_BIGINT, sizeof(int64_t), true }, - { "PACKED", TYPE_VARCHAR, sizeof(StringValue), true }, - { "NULLABLE", TYPE_VARCHAR, sizeof(StringValue), false }, - { "INDEX_TYPE", TYPE_VARCHAR, sizeof(StringValue), false }, - { "COMMENT", TYPE_VARCHAR, sizeof(StringValue), true }, + // name, type, size, is_null + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"NON_UNIQUE", TYPE_BIGINT, sizeof(int64_t), false}, + {"INDEX_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, + {"INDEX_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"SEQ_IN_INDEX", TYPE_BIGINT, sizeof(int64_t), false}, + {"COLUMN_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COLLATION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"CARDINALITY", TYPE_BIGINT, sizeof(int64_t), true}, + {"SUB_PART", TYPE_BIGINT, sizeof(int64_t), true}, + {"PACKED", TYPE_VARCHAR, sizeof(StringValue), true}, + {"NULLABLE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"INDEX_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COMMENT", TYPE_VARCHAR, sizeof(StringValue), true}, }; -SchemaStatisticsScanner::SchemaStatisticsScanner() : - SchemaScanner(_s_cols_statistics, sizeof(_s_cols_statistics) / sizeof(SchemaScanner::ColumnDesc)) { - -} +SchemaStatisticsScanner::SchemaStatisticsScanner() + : SchemaScanner(_s_cols_statistics, + sizeof(_s_cols_statistics) / sizeof(SchemaScanner::ColumnDesc)) {} -SchemaStatisticsScanner::~SchemaStatisticsScanner() { +SchemaStatisticsScanner::~SchemaStatisticsScanner() {} -} - -} +} // namespace doris diff --git a/be/src/exec/schema_scanner/schema_statistics_scanner.h b/be/src/exec/schema_scanner/schema_statistics_scanner.h index fd5a8e0c18..c9fdd5dfaf 100644 --- a/be/src/exec/schema_scanner/schema_statistics_scanner.h +++ b/be/src/exec/schema_scanner/schema_statistics_scanner.h @@ -29,8 +29,7 @@ public: private: static SchemaScanner::ColumnDesc _s_cols_statistics[]; - }; -} +} // namespace doris #endif // DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_SCHEMA_STATISTICS_SCANNER_H diff --git a/be/src/exec/set_operation_node.cpp b/be/src/exec/set_operation_node.cpp index 4b4ed9f828..6d0ee3676d 100644 --- a/be/src/exec/set_operation_node.cpp +++ b/be/src/exec/set_operation_node.cpp @@ -140,7 +140,8 @@ Status SetOperationNode::open(RuntimeState* state) { RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); SCOPED_TIMER(_runtime_profile->total_time_counter()); SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("SetOperation, while constructing the hash table."); + SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB( + "SetOperation, while constructing the hash table."); RETURN_IF_CANCELLED(state); // open result expr lists. for (const std::vector& exprs : _child_expr_lists) { diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp index aeeb17e05c..01a023b07b 100644 --- a/be/src/exec/table_function_node.cpp +++ b/be/src/exec/table_function_node.cpp @@ -280,13 +280,13 @@ Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, boo memcpy(tuple_ptr, child_tuple, parent_tuple_desc->byte_size()); // only deep copy the child slot if it is selected and is var len (Eg: string, bitmap, hll) for (int j = 0; j < _child_slot_sizes[i]; ++j) { - SlotDescriptor *child_slot_desc = child_tuple_desc->slots()[j]; - SlotDescriptor *parent_slot_desc = parent_tuple_desc->slots()[j]; + SlotDescriptor* child_slot_desc = child_tuple_desc->slots()[j]; + SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[j]; if (_output_slot_ids[parent_slot_desc->id()] && - !child_tuple->is_null(child_slot_desc->null_indicator_offset()) - && child_slot_desc->type().is_string_type()) { - void *dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); + !child_tuple->is_null(child_slot_desc->null_indicator_offset()) && + child_slot_desc->type().is_string_type()) { + void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); RawValue::write(child_tuple->get_slot(child_slot_desc->tuple_offset()), dest_slot, parent_slot_desc->type(), row_batch->tuple_data_pool()); diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index edc7d0eca4..c8f3b5ef16 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -24,7 +24,6 @@ #include "util/string_parser.hpp" #include "util/time.h" - namespace doris { void OlapTableIndexSchema::to_protobuf(POlapTableIndexSchema* pindex) const { @@ -161,9 +160,7 @@ std::string OlapTablePartition::debug_string(TupleDescriptor* tuple_desc) const OlapTablePartitionParam::OlapTablePartitionParam(std::shared_ptr schema, const TOlapTablePartitionParam& t_param) - : _schema(schema), - _t_param(t_param), - _mem_pool(new MemPool("OlapTablePartitionParam")) {} + : _schema(schema), _t_param(t_param), _mem_pool(new MemPool("OlapTablePartitionParam")) {} OlapTablePartitionParam::~OlapTablePartitionParam() {} @@ -293,9 +290,11 @@ Status OlapTablePartitionParam::init() { return Status::OK(); } -bool OlapTablePartitionParam::find_partition(Tuple* tuple, const OlapTablePartition** partition) const { +bool OlapTablePartitionParam::find_partition(Tuple* tuple, + const OlapTablePartition** partition) const { const TOlapTablePartition& t_part = _t_param.partitions[0]; - auto it = t_part.__isset.in_keys ? _partitions_map->find(tuple) : _partitions_map->upper_bound(tuple); + auto it = t_part.__isset.in_keys ? _partitions_map->find(tuple) + : _partitions_map->upper_bound(tuple); if (it == _partitions_map->end()) { return false; } @@ -306,7 +305,8 @@ bool OlapTablePartitionParam::find_partition(Tuple* tuple, const OlapTablePartit return false; } -uint32_t OlapTablePartitionParam::find_tablet(Tuple* tuple, const OlapTablePartition& partition) const { +uint32_t OlapTablePartitionParam::find_tablet(Tuple* tuple, + const OlapTablePartition& partition) const { return _compute_tablet_index(tuple, partition.num_buckets); } @@ -412,13 +412,14 @@ std::string OlapTablePartitionParam::debug_string() const { } VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptr& schema, - const TOlapTablePartitionParam& t_param) + const TOlapTablePartitionParam& t_param) : _schema(schema), _t_param(t_param), _slots(_schema->tuple_desc()->slots()), _mem_tracker(MemTracker::create_virtual_tracker(-1, "OlapTablePartitionParam")) { for (auto slot : _slots) { - _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(), slot->col_name()}); + _partition_block.insert( + {slot->get_empty_mutable_column(), slot->get_data_type_ptr(), slot->col_name()}); } } @@ -432,7 +433,9 @@ Status VOlapTablePartitionParam::init() { slot_column_names.emplace_back(slot_desc->col_name()); } - auto find_slot_locs = [&slot_column_names](const std::string& slot_name, std::vector& locs, const std::string& column_type) { + auto find_slot_locs = [&slot_column_names](const std::string& slot_name, + std::vector& locs, + const std::string& column_type) { auto it = std::find(slot_column_names.begin(), slot_column_names.end(), slot_name); if (it == slot_column_names.end()) { return Status::InternalError(column_type + " column not found, column =" + slot_name); @@ -447,8 +450,9 @@ Status VOlapTablePartitionParam::init() { } } - _partitions_map.reset(new std::map( - VOlapTablePartKeyComparator(_partition_slot_locs))); + _partitions_map.reset( + new std::map( + VOlapTablePartKeyComparator(_partition_slot_locs))); if (_t_param.__isset.distributed_columns) { for (auto& col : _t_param.distributed_columns) { RETURN_IF_ERROR(find_slot_locs(col, _distributed_slot_locs, "distributed")); @@ -467,7 +471,8 @@ Status VOlapTablePartitionParam::init() { auto column = key->first->get_by_position(_distributed_slot_locs[i]).column; auto val = column->get_data_at(key->second); if (val.data != nullptr) { - hash_val = RawValue::zlib_crc32(val.data, val.size, slot_desc->type().type, hash_val); + hash_val = RawValue::zlib_crc32(val.data, val.size, slot_desc->type().type, + hash_val); } else { // NULL is treat as 0 when hash static const int INT_VALUE = 0; @@ -498,7 +503,8 @@ Status VOlapTablePartitionParam::init() { } } else { for (const auto& keys : t_part.in_keys) { - RETURN_IF_ERROR(_create_partition_keys(keys, &part->in_keys.emplace_back(&_partition_block, -1))); + RETURN_IF_ERROR(_create_partition_keys( + keys, &part->in_keys.emplace_back(&_partition_block, -1))); } } @@ -541,8 +547,10 @@ Status VOlapTablePartitionParam::init() { return Status::OK(); } -bool VOlapTablePartitionParam::find_partition(BlockRow* block_row, const VOlapTablePartition** partition) const { - auto it = _is_in_partition ? _partitions_map->find(block_row) : _partitions_map->upper_bound(block_row); +bool VOlapTablePartitionParam::find_partition(BlockRow* block_row, + const VOlapTablePartition** partition) const { + auto it = _is_in_partition ? _partitions_map->find(block_row) + : _partitions_map->upper_bound(block_row); if (it == _partitions_map->end()) { return false; } @@ -553,75 +561,79 @@ bool VOlapTablePartitionParam::find_partition(BlockRow* block_row, const VOlapTa return false; } -uint32_t VOlapTablePartitionParam::find_tablet(BlockRow* block_row, const VOlapTablePartition& partition) const { +uint32_t VOlapTablePartitionParam::find_tablet(BlockRow* block_row, + const VOlapTablePartition& partition) const { return _compute_tablet_index(block_row, partition.num_buckets); } Status VOlapTablePartitionParam::_create_partition_keys(const std::vector& t_exprs, - BlockRow* part_key) { + BlockRow* part_key) { for (int i = 0; i < t_exprs.size(); i++) { - RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key, - _partition_slot_locs[i])); + RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key, _partition_slot_locs[i])); } return Status::OK(); } Status VOlapTablePartitionParam::_create_partition_key(const TExprNode& t_expr, BlockRow* part_key, - uint16_t pos) { + uint16_t pos) { auto column = std::move(*part_key->first->get_by_position(pos).column).mutate(); switch (t_expr.node_type) { case TExprNodeType::DATE_LITERAL: { vectorized::VecDateTimeValue dt; - if (!dt.from_date_str( - t_expr.date_literal.value.c_str(), t_expr.date_literal.value.size())) { + if (!dt.from_date_str(t_expr.date_literal.value.c_str(), + t_expr.date_literal.value.size())) { std::stringstream ss; ss << "invalid date literal in partition column, date=" << t_expr.date_literal; return Status::InternalError(ss.str()); } - column->insert_data(reinterpret_cast(&dt), 0); + column->insert_data(reinterpret_cast(&dt), 0); break; } case TExprNodeType::INT_LITERAL: { switch (t_expr.type.types[0].scalar_type.type) { - case TPrimitiveType::TINYINT: { - int8_t value = t_expr.int_literal.value; - column->insert_data(reinterpret_cast(&value), 0); - break; - } - case TPrimitiveType::SMALLINT: { - int16_t value = t_expr.int_literal.value; - column->insert_data(reinterpret_cast(&value), 0); - break; - } - case TPrimitiveType::INT: { - int32_t value = t_expr.int_literal.value; - column->insert_data(reinterpret_cast(&value), 0); - break; - } - default: - int64_t value = t_expr.int_literal.value; - column->insert_data(reinterpret_cast(&value), 0); + case TPrimitiveType::TINYINT: { + int8_t value = t_expr.int_literal.value; + column->insert_data(reinterpret_cast(&value), 0); + break; + } + case TPrimitiveType::SMALLINT: { + int16_t value = t_expr.int_literal.value; + column->insert_data(reinterpret_cast(&value), 0); + break; + } + case TPrimitiveType::INT: { + int32_t value = t_expr.int_literal.value; + column->insert_data(reinterpret_cast(&value), 0); + break; + } + default: + int64_t value = t_expr.int_literal.value; + column->insert_data(reinterpret_cast(&value), 0); } break; - } case TExprNodeType::LARGE_INT_LITERAL: { + } + case TExprNodeType::LARGE_INT_LITERAL: { StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - __int128 value = StringParser::string_to_int<__int128>(t_expr.large_int_literal.value.c_str(), - t_expr.large_int_literal.value.size(), - &parse_result); + __int128 value = StringParser::string_to_int<__int128>( + t_expr.large_int_literal.value.c_str(), t_expr.large_int_literal.value.size(), + &parse_result); if (parse_result != StringParser::PARSE_SUCCESS) { value = MAX_INT128; } - column->insert_data(reinterpret_cast(&value), 0); + column->insert_data(reinterpret_cast(&value), 0); break; - } case TExprNodeType::STRING_LITERAL: { + } + case TExprNodeType::STRING_LITERAL: { int len = t_expr.string_literal.value.size(); const char* str_val = t_expr.string_literal.value.c_str(); column->insert_data(str_val, len); break; - } case TExprNodeType::BOOL_LITERAL: { - column->insert_data(reinterpret_cast(&t_expr.bool_literal.value), 0); + } + case TExprNodeType::BOOL_LITERAL: { + column->insert_data(reinterpret_cast(&t_expr.bool_literal.value), 0); break; - } default: { + } + default: { std::stringstream ss; ss << "unsupported partition column node type, type=" << t_expr.node_type; return Status::InternalError(ss.str()); diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index bdd3cea5a0..3aad1f6c7c 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -216,14 +216,13 @@ struct VOlapTablePartition { int64_t num_buckets = 0; std::vector indexes; - VOlapTablePartition(vectorized::Block* partition_block): - start_key{partition_block, -1}, end_key{partition_block, -1} {}; + VOlapTablePartition(vectorized::Block* partition_block) + : start_key {partition_block, -1}, end_key {partition_block, -1} {}; }; class VOlapTablePartKeyComparator { public: - VOlapTablePartKeyComparator(const std::vector& slot_locs) - : _slot_locs(slot_locs) {} + VOlapTablePartKeyComparator(const std::vector& slot_locs) : _slot_locs(slot_locs) {} // return true if lhs < rhs // 'row' is -1 mean @@ -253,7 +252,7 @@ private: class VOlapTablePartitionParam { public: VOlapTablePartitionParam(std::shared_ptr& schema, - const TOlapTablePartitionParam& param); + const TOlapTablePartitionParam& param); ~VOlapTablePartitionParam(); @@ -304,7 +303,6 @@ private: uint32_t _mem_usage = 0; }; - using TabletLocation = TTabletLocation; // struct TTabletLocation { // 1: required i64 tablet_id diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index b195591de0..0e89069cb5 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -248,13 +248,15 @@ Status NodeChannel::open_wait() { _add_batches_finished = true; } } else { - _cancel_with_msg(fmt::format("{}, add batch req success but status isn't ok, err: {}", - channel_info(), status.get_error_msg())); + _cancel_with_msg( + fmt::format("{}, add batch req success but status isn't ok, err: {}", + channel_info(), status.get_error_msg())); } if (result.has_execution_time_us()) { _add_batch_counter.add_batch_execution_time_us += result.execution_time_us(); - _add_batch_counter.add_batch_wait_execution_time_us += result.wait_execution_time_us(); + _add_batch_counter.add_batch_wait_execution_time_us += + result.wait_execution_time_us(); _add_batch_counter.add_batch_num++; } }); @@ -643,8 +645,7 @@ OlapTableSink::OlapTableSink(ObjectPool* pool, const RowDescriptor& row_desc, *status = Expr::create_expr_trees(_pool, texprs, &_output_expr_ctxs); } _name = "OlapTableSink"; - } - else { + } else { *status = Status::OK(); } _transfer_data_by_brpc_attachment = config::transfer_data_by_brpc_attachment; @@ -714,7 +715,8 @@ Status OlapTableSink::prepare(RuntimeState* state) { if (!_is_vectorized) { // Prepare the exprs to run. - RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _input_row_desc, _expr_mem_tracker)); + RETURN_IF_ERROR( + Expr::prepare(_output_expr_ctxs, state, _input_row_desc, _expr_mem_tracker)); } // get table's tuple descriptor diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index 84f069dce7..53a7ff3b42 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -57,7 +57,7 @@ class TExpr; namespace vectorized { class Block; class MutableBlock; -} +} // namespace vectorized namespace stream_load { class OlapTableSink; @@ -91,7 +91,7 @@ struct AddBatchCounter { // So using create() to get the closure pointer is recommended. We can delete the closure ptr before the capture vars destruction. // Delete this point is safe, don't worry about RPC callback will run after ReusableClosure deleted. template -class ReusableClosure final: public google::protobuf::Closure { +class ReusableClosure final : public google::protobuf::Closure { public: ReusableClosure() : cid(INVALID_BTHREAD_ID) {} ~ReusableClosure() override { @@ -132,9 +132,7 @@ public: return _packet_in_flight.compare_exchange_strong(value, true); } - void clear_in_flight() { - _packet_in_flight = false; - } + void clear_in_flight() { _packet_in_flight = false; } bool is_packet_in_flight() { return _packet_in_flight; } @@ -229,9 +227,7 @@ public: void clear_all_batches(); - virtual void clear_all_blocks() { - LOG(FATAL) << "NodeChannel::clear_all_blocks not supported"; - } + virtual void clear_all_blocks() { LOG(FATAL) << "NodeChannel::clear_all_blocks not supported"; } std::string channel_info() const { return fmt::format("{}, {}, node={}:{}", _name, _load_info, _node_info.host, @@ -282,7 +278,7 @@ protected: // limit _pending_batches size std::atomic _pending_batches_bytes {0}; size_t _max_pending_batches_bytes {10 * 1024 * 1024}; - std::mutex _pending_batches_lock; // reuse for vectorized + std::mutex _pending_batches_lock; // reuse for vectorized std::atomic _pending_batches_num {0}; // reuse for vectorized std::shared_ptr _stub = nullptr; @@ -327,8 +323,8 @@ private: class IndexChannel { public: - IndexChannel(OlapTableSink* parent, int64_t index_id, bool is_vec) : - _parent(parent), _index_id(index_id), _is_vectorized(is_vec) { + IndexChannel(OlapTableSink* parent, int64_t index_id, bool is_vec) + : _parent(parent), _index_id(index_id), _is_vectorized(is_vec) { _index_channel_tracker = MemTracker::create_tracker(-1, "IndexChannel"); } ~IndexChannel() = default; diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp index 1eacaadfb9..2799fada2b 100644 --- a/be/src/exec/text_converter.hpp +++ b/be/src/exec/text_converter.hpp @@ -186,7 +186,8 @@ inline bool TextConverter::write_column(const SlotDescriptor* slot_desc, // Parse the raw-text data. Translate the text string to internal format. switch (slot_desc->type().type) { case TYPE_HLL: { - reinterpret_cast(col_ptr)->get_data().emplace_back(HyperLogLog(Slice(data, len))); + reinterpret_cast(col_ptr)->get_data().emplace_back( + HyperLogLog(Slice(data, len))); break; } case TYPE_VARCHAR: diff --git a/be/src/exprs/aggregate_functions.cpp b/be/src/exprs/aggregate_functions.cpp index f3772a6078..dbd1b1c818 100644 --- a/be/src/exprs/aggregate_functions.cpp +++ b/be/src/exprs/aggregate_functions.cpp @@ -238,7 +238,6 @@ StringVal AggregateFunctions::percentile_serialize(FunctionContext* ctx, const S } DoubleVal AggregateFunctions::percentile_finalize(FunctionContext* ctx, const StringVal& src) { - PercentileState* percentile = reinterpret_cast(src.ptr); double quantile = percentile->quantile; auto result = percentile->counts.terminate(quantile); @@ -343,7 +342,6 @@ void AggregateFunctions::percentile_approx_merge(FunctionContext* ctx, const Str DoubleVal AggregateFunctions::percentile_approx_finalize(FunctionContext* ctx, const StringVal& src) { - PercentileApproxState* percentile = reinterpret_cast(src.ptr); double quantile = percentile->targetQuantile; double result = percentile->digest->quantile(quantile); @@ -1112,7 +1110,7 @@ void AggregateFunctions::hll_merge(FunctionContext* ctx, const StringVal& src, S DCHECK(!src.is_null); DCHECK_EQ(dst->len, std::pow(2, HLL_COLUMN_PRECISION)); DCHECK_EQ(src.len, std::pow(2, HLL_COLUMN_PRECISION)); - + for (int i = 0; i < src.len; ++i) { dst->ptr[i] = (dst->ptr[i] < src.ptr[i] ? src.ptr[i] : dst->ptr[i]); } @@ -2424,7 +2422,7 @@ struct WindowFunnelState { return 0; } - void merge(WindowFunnelState *other) { + void merge(WindowFunnelState* other) { if (other->events.empty()) { return; } @@ -2453,7 +2451,7 @@ struct WindowFunnelState { events.size() * (sizeof(int64_t) + sizeof(int)); } - void serialize(uint8_t *buf) { + void serialize(uint8_t* buf) { memcpy(buf, &max_event_level, sizeof(int)); buf += sizeof(int); memcpy(buf, &window, sizeof(int64_t)); @@ -2472,7 +2470,7 @@ struct WindowFunnelState { } } - void deserialize(uint8_t *buf) { + void deserialize(uint8_t* buf) { uint64_t size; memcpy(&max_event_level, buf, sizeof(int)); @@ -2509,8 +2507,9 @@ void AggregateFunctions::window_funnel_init(FunctionContext* ctx, StringVal* dst } void AggregateFunctions::window_funnel_update(FunctionContext* ctx, const BigIntVal& window, - const StringVal& mode, const DateTimeVal& timestamp, - int num_cond, const BooleanVal* conds, StringVal* dst) { + const StringVal& mode, const DateTimeVal& timestamp, + int num_cond, const BooleanVal* conds, + StringVal* dst) { DCHECK(dst->ptr != nullptr); DCHECK_EQ(sizeof(WindowFunnelState), dst->len); @@ -2530,8 +2529,7 @@ void AggregateFunctions::window_funnel_update(FunctionContext* ctx, const BigInt } } -StringVal AggregateFunctions::window_funnel_serialize(FunctionContext* ctx, - const StringVal& src) { +StringVal AggregateFunctions::window_funnel_serialize(FunctionContext* ctx, const StringVal& src) { WindowFunnelState* state = reinterpret_cast(src.ptr); int64_t serialized_size = state->serialized_size(); StringVal result(ctx, sizeof(double) + serialized_size); @@ -2542,7 +2540,7 @@ StringVal AggregateFunctions::window_funnel_serialize(FunctionContext* ctx, } void AggregateFunctions::window_funnel_merge(FunctionContext* ctx, const StringVal& src, - StringVal* dst) { + StringVal* dst) { DCHECK(dst->ptr != nullptr); DCHECK_EQ(sizeof(WindowFunnelState), dst->len); WindowFunnelState* dst_state = reinterpret_cast(dst->ptr); diff --git a/be/src/exprs/aggregate_functions.h b/be/src/exprs/aggregate_functions.h index eea049cfe1..b995c21d29 100644 --- a/be/src/exprs/aggregate_functions.h +++ b/be/src/exprs/aggregate_functions.h @@ -302,11 +302,11 @@ public: static DecimalV2Val decimalv2_knuth_var_get_value(FunctionContext* ctx, const StringVal& state_sv); static DecimalV2Val decimalv2_knuth_var_pop_get_value(FunctionContext* context, - const StringVal& val); + const StringVal& val); static DecimalV2Val decimalv2_knuth_stddev_get_value(FunctionContext* context, - const StringVal& val); + const StringVal& val); static DecimalV2Val decimalv2_knuth_stddev_pop_get_value(FunctionContext* context, - const StringVal& val); + const StringVal& val); /// ----------------------------- Analytic Functions --------------------------------- /// Analytic functions implement the UDA interface (except Merge(), Serialize()) and are @@ -370,10 +370,9 @@ public: // windowFunnel static void window_funnel_init(FunctionContext* ctx, StringVal* dst); static void window_funnel_update(FunctionContext* ctx, const BigIntVal& window, - const StringVal& mode, const DateTimeVal& timestamp, - int num_cond, const BooleanVal* conds, StringVal* dst); - static void window_funnel_merge(FunctionContext* ctx, const StringVal& src, - StringVal* dst); + const StringVal& mode, const DateTimeVal& timestamp, + int num_cond, const BooleanVal* conds, StringVal* dst); + static void window_funnel_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst); static StringVal window_funnel_serialize(FunctionContext* ctx, const StringVal& src); static IntVal window_funnel_finalize(FunctionContext* ctx, const StringVal& src); diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h index f2cee64823..f77ca1af94 100644 --- a/be/src/exprs/bitmap_function.h +++ b/be/src/exprs/bitmap_function.h @@ -87,9 +87,9 @@ public: static BigIntVal bitmap_or_count(FunctionContext* ctx, const StringVal& lhs, int num_args, const StringVal* bitmap_strs); static BigIntVal bitmap_and_count(FunctionContext* ctx, const StringVal& lhs, int num_args, - const StringVal* bitmap_strs); + const StringVal* bitmap_strs); static BigIntVal bitmap_xor_count(FunctionContext* ctx, const StringVal& lhs, int num_args, - const StringVal* bitmap_strs); + const StringVal* bitmap_strs); static StringVal bitmap_to_string(FunctionContext* ctx, const StringVal& input); // Convert a comma separated string to a Bitmap diff --git a/be/src/exprs/block_bloom_filter_avx_impl.cc b/be/src/exprs/block_bloom_filter_avx_impl.cc index a7eb421c57..e005b4e9c6 100644 --- a/be/src/exprs/block_bloom_filter_avx_impl.cc +++ b/be/src/exprs/block_bloom_filter_avx_impl.cc @@ -48,8 +48,8 @@ void BlockBloomFilter::bucket_insert_avx2(const uint32_t bucket_idx, const uint3 _mm256_zeroupper(); } -bool BlockBloomFilter::bucket_find_avx2(const uint32_t bucket_idx, const uint32_t hash) const - noexcept { +bool BlockBloomFilter::bucket_find_avx2(const uint32_t bucket_idx, + const uint32_t hash) const noexcept { const __m256i mask = make_mark(hash); const __m256i bucket = reinterpret_cast<__m256i*>(_directory)[bucket_idx]; // We should return true if 'bucket' has a one wherever 'mask' does. _mm256_testc_si256 diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h index ae1a8945e8..9632ddb14b 100644 --- a/be/src/exprs/bloomfilter_predicate.h +++ b/be/src/exprs/bloomfilter_predicate.h @@ -322,7 +322,7 @@ public: bool find_olap_engine(const void* data) const override { return dummy.find_olap_engine(*this->_bloom_filter, data); } - + bool find_uint32_t(uint32_t data) const override { return dummy.find(*this->_bloom_filter, data); } diff --git a/be/src/exprs/decimalv2_operators.cpp b/be/src/exprs/decimalv2_operators.cpp index ccc41b9a8a..94747e6d43 100644 --- a/be/src/exprs/decimalv2_operators.cpp +++ b/be/src/exprs/decimalv2_operators.cpp @@ -176,23 +176,23 @@ DateTimeVal DecimalV2Operators::cast_to_date_val(FunctionContext* context, return result; \ } -#define DECIMAL_ARITHMETIC_OP_DIVIDE(FN_NAME, OP) \ -DecimalV2Val DecimalV2Operators::FN_NAME##_decimalv2_val_decimalv2_val( \ - FunctionContext* context, const DecimalV2Val& v1, const DecimalV2Val& v2) { \ - if (v1.is_null || v2.is_null || v2.value() == 0) return DecimalV2Val::null(); \ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ - DecimalV2Value ir = iv1 OP iv2; \ - DecimalV2Val result; \ - ir.to_decimal_val(&result); \ - return result; \ -} +#define DECIMAL_ARITHMETIC_OP_DIVIDE(FN_NAME, OP) \ + DecimalV2Val DecimalV2Operators::FN_NAME##_decimalv2_val_decimalv2_val( \ + FunctionContext* context, const DecimalV2Val& v1, const DecimalV2Val& v2) { \ + if (v1.is_null || v2.is_null || v2.value() == 0) return DecimalV2Val::null(); \ + DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ + DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ + DecimalV2Value ir = iv1 OP iv2; \ + DecimalV2Val result; \ + ir.to_decimal_val(&result); \ + return result; \ + } -#define DECIMAL_ARITHMETIC_OPS() \ - DECIMAL_ARITHMETIC_OP(add, +); \ - DECIMAL_ARITHMETIC_OP(subtract, -); \ - DECIMAL_ARITHMETIC_OP(multiply, *); \ - DECIMAL_ARITHMETIC_OP_DIVIDE(divide, /); \ +#define DECIMAL_ARITHMETIC_OPS() \ + DECIMAL_ARITHMETIC_OP(add, +); \ + DECIMAL_ARITHMETIC_OP(subtract, -); \ + DECIMAL_ARITHMETIC_OP(multiply, *); \ + DECIMAL_ARITHMETIC_OP_DIVIDE(divide, /); \ DECIMAL_ARITHMETIC_OP_DIVIDE(mod, %); DECIMAL_ARITHMETIC_OPS(); diff --git a/be/src/exprs/encryption_functions.h b/be/src/exprs/encryption_functions.h index 0046da8c12..5ab7d41cb7 100644 --- a/be/src/exprs/encryption_functions.h +++ b/be/src/exprs/encryption_functions.h @@ -44,10 +44,10 @@ static StringCaseUnorderedMap aes_mode_map { {"AES_256_CTR", AES_256_CTR}, {"AES_128_OFB", AES_128_OFB}, {"AES_192_OFB", AES_192_OFB}, {"AES_256_OFB", AES_256_OFB}}; static StringCaseUnorderedMap sm4_mode_map {{"SM4_128_ECB", SM4_128_ECB}, - {"SM4_128_CBC", SM4_128_CBC}, - {"SM4_128_CFB128", SM4_128_CFB128}, - {"SM4_128_OFB", SM4_128_OFB}, - {"SM4_128_CTR", SM4_128_CTR}}; + {"SM4_128_CBC", SM4_128_CBC}, + {"SM4_128_CFB128", SM4_128_CFB128}, + {"SM4_128_OFB", SM4_128_OFB}, + {"SM4_128_CTR", SM4_128_CTR}}; class EncryptionFunctions { public: static void init(); diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index da8a6aab1c..518a69d774 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -39,11 +39,7 @@ namespace doris { ExprContext::ExprContext(Expr* root) - : _root(root), - _is_clone(false), - _prepared(false), - _opened(false), - _closed(false) {} + : _root(root), _is_clone(false), _prepared(false), _opened(false), _closed(false) {} ExprContext::~ExprContext() { DCHECK(!_prepared || _closed); diff --git a/be/src/exprs/new_agg_fn_evaluator.h b/be/src/exprs/new_agg_fn_evaluator.h index 06b43e326e..ae9178c23c 100644 --- a/be/src/exprs/new_agg_fn_evaluator.h +++ b/be/src/exprs/new_agg_fn_evaluator.h @@ -248,8 +248,7 @@ private: void Update(const TupleRow* row, Tuple* dst, void* fn); /// Writes the result in src into dst pointed to by dst_slot_desc - void SetDstSlot(const doris_udf::AnyVal* src, const SlotDescriptor& dst_slot_desc, - Tuple* dst); + void SetDstSlot(const doris_udf::AnyVal* src, const SlotDescriptor& dst_slot_desc, Tuple* dst); /// Sets up the arguments to call 'fn'. This converts from the agg-expr signature, /// taking TupleRow to the UDA signature taking AnyVals. Writes the serialize/finalize diff --git a/be/src/exprs/new_in_predicate.h b/be/src/exprs/new_in_predicate.h index b859b33e97..ea37714a0f 100644 --- a/be/src/exprs/new_in_predicate.h +++ b/be/src/exprs/new_in_predicate.h @@ -323,8 +323,8 @@ private: /// The templated function that provides the implementation for all the In() and NotIn() /// functions. template - static doris_udf::BooleanVal templated_in(doris_udf::FunctionContext* context, - const T& val, int num_args, const T* args); + static doris_udf::BooleanVal templated_in(doris_udf::FunctionContext* context, const T& val, + int num_args, const T* args); /// Initializes an SetLookupState in ctx. template diff --git a/be/src/exprs/rpc_fn.cpp b/be/src/exprs/rpc_fn.cpp index 63ddc93dfc..39337e0a6e 100644 --- a/be/src/exprs/rpc_fn.cpp +++ b/be/src/exprs/rpc_fn.cpp @@ -437,14 +437,16 @@ void convert_col_to_pvalue(const vectorized::ColumnPtr& column, if constexpr (nullable) { if (!column->is_null_at(row_num)) { vectorized::VecDateTimeValue v = - vectorized::VecDateTimeValue::create_from_olap_date(column->get_int(row_num)); + vectorized::VecDateTimeValue::create_from_olap_date( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); } } else { vectorized::VecDateTimeValue v = - vectorized::VecDateTimeValue::create_from_olap_date(column->get_int(row_num)); + vectorized::VecDateTimeValue::create_from_olap_date( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); @@ -460,7 +462,8 @@ void convert_col_to_pvalue(const vectorized::ColumnPtr& column, if constexpr (nullable) { if (!column->is_null_at(row_num)) { vectorized::VecDateTimeValue v = - vectorized::VecDateTimeValue::create_from_olap_datetime(column->get_int(row_num)); + vectorized::VecDateTimeValue::create_from_olap_datetime( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); @@ -470,7 +473,8 @@ void convert_col_to_pvalue(const vectorized::ColumnPtr& column, } } else { vectorized::VecDateTimeValue v = - vectorized::VecDateTimeValue::create_from_olap_datetime(column->get_int(row_num)); + vectorized::VecDateTimeValue::create_from_olap_datetime( + column->get_int(row_num)); date_time->set_day(v.day()); date_time->set_month(v.month()); date_time->set_year(v.year()); diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 821b940e4a..f32b1aa1b7 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -329,8 +329,12 @@ public: _filter_id(params->filter_id) {} // for a 'tmp' runtime predicate wrapper // only could called assign method or as a param for merge - RuntimePredicateWrapper(ObjectPool* pool, RuntimeFilterType type, UniqueId fragment_instance_id, uint32_t filter_id) - : _pool(pool), _filter_type(type), _fragment_instance_id(fragment_instance_id), _filter_id(filter_id) {} + RuntimePredicateWrapper(ObjectPool* pool, RuntimeFilterType type, UniqueId fragment_instance_id, + uint32_t filter_id) + : _pool(pool), + _filter_type(type), + _fragment_instance_id(fragment_instance_id), + _filter_id(filter_id) {} // init runtime filter wrapper // alloc memory to init runtime filter function Status init(const RuntimeFilterParams* params) { @@ -453,7 +457,8 @@ public: DCHECK(container != nullptr); DCHECK(_pool != nullptr); DCHECK(prob_expr->root()->type().type == _column_return_type || - (is_string_type(prob_expr->root()->type().type) && is_string_type(_column_return_type))); + (is_string_type(prob_expr->root()->type().type) && + is_string_type(_column_return_type))); auto real_filter_type = get_real_type(); switch (real_filter_type) { @@ -909,9 +914,9 @@ private: uint32_t _filter_id; }; -Status IRuntimeFilter::create(RuntimeState* state, ObjectPool* pool, - const TRuntimeFilterDesc* desc, const TQueryOptions* query_options, - const RuntimeFilterRole role, int node_id, IRuntimeFilter** res) { +Status IRuntimeFilter::create(RuntimeState* state, ObjectPool* pool, const TRuntimeFilterDesc* desc, + const TQueryOptions* query_options, const RuntimeFilterRole role, + int node_id, IRuntimeFilter** res) { *res = pool->add(new IRuntimeFilter(state, pool)); (*res)->set_role(role); UniqueId fragment_instance_id(state->fragment_instance_id()); @@ -1087,7 +1092,8 @@ Status IRuntimeFilter::_create_wrapper(const T* param, ObjectPool* pool, std::unique_ptr* wrapper) { int filter_type = param->request->filter_type(); wrapper->reset(new RuntimePredicateWrapper(pool, get_type(filter_type), - UniqueId(param->request->fragment_id()), param->request->filter_id())); + UniqueId(param->request->fragment_id()), + param->request->filter_id())); switch (filter_type) { case PFilterType::IN_FILTER: { diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index 8d5b433b9b..0f5b32aec3 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -76,8 +76,12 @@ inline std::string to_string(RuntimeFilterType type) { enum class RuntimeFilterRole { PRODUCER = 0, CONSUMER = 1 }; struct RuntimeFilterParams { - RuntimeFilterParams() : filter_type(RuntimeFilterType::UNKNOWN_FILTER), - bloom_filter_size(-1), max_in_num(0), filter_id(0), fragment_instance_id(0, 0) {} + RuntimeFilterParams() + : filter_type(RuntimeFilterType::UNKNOWN_FILTER), + bloom_filter_size(-1), + max_in_num(0), + filter_id(0), + fragment_instance_id(0, 0) {} RuntimeFilterType filter_type; PrimitiveType column_return_type; @@ -91,7 +95,7 @@ struct RuntimeFilterParams { struct UpdateRuntimeFilterParams { const PPublishFilterRequest* request; const char* data; - ObjectPool *pool; + ObjectPool* pool; }; struct MergeRuntimeFilterParams { @@ -123,10 +127,9 @@ public: ~IRuntimeFilter() = default; - static Status create(RuntimeState* state, ObjectPool* pool, - const TRuntimeFilterDesc* desc, const TQueryOptions* query_options, - const RuntimeFilterRole role, int node_id, - IRuntimeFilter** res); + static Status create(RuntimeState* state, ObjectPool* pool, const TRuntimeFilterDesc* desc, + const TQueryOptions* query_options, const RuntimeFilterRole role, + int node_id, IRuntimeFilter** res); // insert data to build filter // only used for producer @@ -177,10 +180,8 @@ public: void signal(); // init filter with desc - Status init_with_desc(const TRuntimeFilterDesc* desc, - const TQueryOptions* options, - UniqueId fragment_id = UniqueId(0, 0), - int node_id = -1); + Status init_with_desc(const TRuntimeFilterDesc* desc, const TQueryOptions* options, + UniqueId fragment_id = UniqueId(0, 0), int node_id = -1); // serialize _wrapper to protobuf Status serialize(PMergeFilterRequest* request, void** data, int* len); @@ -202,7 +203,7 @@ public: // for ut bool is_ignored() { return _is_ignored; } - void set_ignored_msg(std::string &msg) { _ignored_msg = msg; } + void set_ignored_msg(std::string& msg) { _ignored_msg = msg; } // for ut bool is_bloomfilter(); diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h index 5326df4139..80716936e4 100644 --- a/be/src/exprs/runtime_filter_slots.h +++ b/be/src/exprs/runtime_filter_slots.h @@ -53,7 +53,7 @@ public: consumer_filter->signal(); }; - auto ignore_remote_filter = [](IRuntimeFilter* runtime_filter, std::string &msg) { + auto ignore_remote_filter = [](IRuntimeFilter* runtime_filter, std::string& msg) { runtime_filter->set_ignored(); runtime_filter->set_ignored_msg(msg); runtime_filter->publish(); @@ -78,7 +78,8 @@ public: return d1.type < d2.type; } }; - std::sort(sorted_runtime_filter_descs.begin(), sorted_runtime_filter_descs.end(), compare_desc); + std::sort(sorted_runtime_filter_descs.begin(), sorted_runtime_filter_descs.end(), + compare_desc); for (auto& filter_desc : sorted_runtime_filter_descs) { IRuntimeFilter* runtime_filter = nullptr; @@ -94,7 +95,8 @@ public: bool is_in_filter = (runtime_filter->type() == RuntimeFilterType::IN_FILTER); - if (over_max_in_num && runtime_filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER) { + if (over_max_in_num && + runtime_filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER) { runtime_filter->change_to_bloom_filter(); } @@ -110,25 +112,28 @@ public: bool exists_in_filter = has_in_filter[runtime_filter->expr_order()]; if (is_in_filter && over_max_in_num) { VLOG_DEBUG << "fragment instance " << print_id(state->fragment_instance_id()) - << " ignore runtime filter(in filter id " << filter_desc.filter_id - << ") because: in_num(" << hash_table_size - << ") >= max_in_num(" << max_in_num << ")"; + << " ignore runtime filter(in filter id " << filter_desc.filter_id + << ") because: in_num(" << hash_table_size << ") >= max_in_num(" + << max_in_num << ")"; ignore_local_filter(filter_desc.filter_id); continue; } else if (!is_in_filter && exists_in_filter) { // do not create 'bloom filter' and 'minmax filter' when 'in filter' has created // because in filter is exactly filter, so it is enough to filter data VLOG_DEBUG << "fragment instance " << print_id(state->fragment_instance_id()) - << " ignore runtime filter(" << to_string(runtime_filter->type()) - << " id " << filter_desc.filter_id - << ") because: already exists in filter"; + << " ignore runtime filter(" << to_string(runtime_filter->type()) + << " id " << filter_desc.filter_id + << ") because: already exists in filter"; ignore_local_filter(filter_desc.filter_id); continue; } } else if (is_in_filter && over_max_in_num) { #ifdef VLOG_DEBUG_IS_ON - std::string msg = fmt::format("fragment instance {} ignore runtime filter(in filter id {}) because: in_num({}) >= max_in_num({})", - print_id(state->fragment_instance_id()), filter_desc.filter_id, hash_table_size, max_in_num); + std::string msg = fmt::format( + "fragment instance {} ignore runtime filter(in filter id {}) because: " + "in_num({}) >= max_in_num({})", + print_id(state->fragment_instance_id()), filter_desc.filter_id, + hash_table_size, max_in_num); ignore_remote_filter(runtime_filter, msg); #else ignore_remote_filter(runtime_filter, "ignored"); @@ -136,8 +141,9 @@ public: continue; } - if ((runtime_filter->type() == RuntimeFilterType::IN_FILTER) - || (runtime_filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER && !over_max_in_num)) { + if ((runtime_filter->type() == RuntimeFilterType::IN_FILTER) || + (runtime_filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER && + !over_max_in_num)) { has_in_filter[runtime_filter->expr_order()] = true; } _runtime_filters[runtime_filter->expr_order()].push_back(runtime_filter); diff --git a/be/src/exprs/slot_ref.cpp b/be/src/exprs/slot_ref.cpp index 7e325eaafe..f4c28dcbd8 100644 --- a/be/src/exprs/slot_ref.cpp +++ b/be/src/exprs/slot_ref.cpp @@ -66,7 +66,9 @@ Status SlotRef::prepare(const SlotDescriptor* slot_desc, const RowDescriptor& ro } _tuple_idx = row_desc.get_tuple_idx(slot_desc->parent()); if (_tuple_idx == RowDescriptor::INVALID_IDX) { - return Status::InternalError(strings::Substitute("failed to get tuple idx with tuple id: $0, slot id: $1", slot_desc->parent(), _slot_id)); + return Status::InternalError( + strings::Substitute("failed to get tuple idx with tuple id: $0, slot id: $1", + slot_desc->parent(), _slot_id)); } _tuple_is_nullable = row_desc.tuple_is_nullable(_tuple_idx); _slot_offset = slot_desc->tuple_offset(); @@ -98,7 +100,9 @@ Status SlotRef::prepare(RuntimeState* state, const RowDescriptor& row_desc, Expr // TODO(marcel): get from runtime state _tuple_idx = row_desc.get_tuple_idx(slot_desc->parent()); if (_tuple_idx == RowDescriptor::INVALID_IDX) { - return Status::InternalError(strings::Substitute("failed to get tuple idx when prepare with tuple id: $0, slot id: $1", slot_desc->parent(), _slot_id)); + return Status::InternalError(strings::Substitute( + "failed to get tuple idx when prepare with tuple id: $0, slot id: $1", + slot_desc->parent(), _slot_id)); } DCHECK(_tuple_idx != RowDescriptor::INVALID_IDX); _tuple_is_nullable = row_desc.tuple_is_nullable(_tuple_idx); diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 70f51026c4..c976dbb243 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -492,7 +492,7 @@ bool StringFunctions::set_re2_options(const StringVal& match_parameter, std::str // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. re2::RE2* StringFunctions::compile_regex(const StringVal& pattern, std::string* error_str, - const StringVal& match_parameter) { + const StringVal& match_parameter) { re2::StringPiece pattern_sp(reinterpret_cast(pattern.ptr), pattern.len); re2::RE2::Options options; // Disable error logging in case e.g. every row causes an error @@ -515,7 +515,7 @@ re2::RE2* StringFunctions::compile_regex(const StringVal& pattern, std::string* return nullptr; } return re; -} +} void StringFunctions::regexp_prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) { diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 3fdfa7b907..ffe4b0a9c7 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -189,7 +189,7 @@ public: const doris_udf::StringVal& str); // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. static re2::RE2* compile_regex(const StringVal& pattern, std::string* error_str, - const StringVal& match_parameter); + const StringVal& match_parameter); }; } // namespace doris diff --git a/be/src/exprs/table_function/explode_json_array.cpp b/be/src/exprs/table_function/explode_json_array.cpp index fa8cf210e8..5158157e5d 100644 --- a/be/src/exprs/table_function/explode_json_array.cpp +++ b/be/src/exprs/table_function/explode_json_array.cpp @@ -28,127 +28,126 @@ std::string ParsedData::false_value = "false"; int ParsedData::set_output(ExplodeJsonArrayType type, rapidjson::Document& document) { int size = document.GetArray().Size(); switch (type) { - case ExplodeJsonArrayType::INT: { - _data.resize(size); - _backup_int.resize(size); - int i = 0; - for (auto& v : document.GetArray()) { - if (v.IsInt64()) { - _backup_int[i] = v.GetInt64(); - _data[i] = &_backup_int[i]; + case ExplodeJsonArrayType::INT: { + _data.resize(size); + _backup_int.resize(size); + int i = 0; + for (auto& v : document.GetArray()) { + if (v.IsInt64()) { + _backup_int[i] = v.GetInt64(); + _data[i] = &_backup_int[i]; + } else { + _data[i] = nullptr; + } + ++i; + } + break; + } + case ExplodeJsonArrayType::DOUBLE: { + _data.resize(size); + _backup_double.resize(size); + int i = 0; + for (auto& v : document.GetArray()) { + if (v.IsDouble()) { + _backup_double[i] = v.GetDouble(); + _data[i] = &_backup_double[i]; + } else { + _data[i] = nullptr; + } + ++i; + } + break; + } + case ExplodeJsonArrayType::STRING: { + _data_string.clear(); + _backup_string.clear(); + _string_nulls.clear(); + int32_t wbytes = 0; + int i = 0; + for (auto& v : document.GetArray()) { + switch (v.GetType()) { + case rapidjson::Type::kStringType: + _backup_string.emplace_back(v.GetString(), v.GetStringLength()); + _string_nulls.push_back(false); + // do not set _data_string here. + // Because the address of the string stored in `_backup_string` may + // change each time `emplace_back()` is called. + break; + case rapidjson::Type::kNumberType: + if (v.IsUint()) { + wbytes = sprintf(tmp_buf, "%u", v.GetUint()); + } else if (v.IsInt()) { + wbytes = sprintf(tmp_buf, "%d", v.GetInt()); + } else if (v.IsUint64()) { + wbytes = sprintf(tmp_buf, "%lu", v.GetUint64()); + } else if (v.IsInt64()) { + wbytes = sprintf(tmp_buf, "%ld", v.GetInt64()); } else { - _data[i] = nullptr; + wbytes = sprintf(tmp_buf, "%f", v.GetDouble()); } - ++i; + _backup_string.emplace_back(tmp_buf, wbytes); + _string_nulls.push_back(false); + // do not set _data_string here. + // Because the address of the string stored in `_backup_string` may + // change each time `emplace_back()` is called. + break; + case rapidjson::Type::kFalseType: + _data_string.emplace_back(true_value); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kTrueType: + _data_string.emplace_back(false_value); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kNullType: + _data_string.push_back({}); + _string_nulls.push_back(true); + break; + default: + _data_string.push_back({}); + _string_nulls.push_back(true); + break; } - break; + ++i; } - case ExplodeJsonArrayType::DOUBLE: { - _data.resize(size); - _backup_double.resize(size); - int i = 0; - for (auto& v : document.GetArray()) { - if (v.IsDouble()) { - _backup_double[i] = v.GetDouble(); - _data[i] = &_backup_double[i]; - } else { - _data[i] = nullptr; - } - ++i; - } - break; + // Must set _data_string at the end, so that we can + // save the real addr of string in `_backup_string` to `_data_string`. + for (auto& str : _backup_string) { + _data_string.emplace_back(str); } - case ExplodeJsonArrayType::STRING: { - _data_string.clear(); - _backup_string.clear(); - _string_nulls.clear(); - int32_t wbytes = 0; - int i = 0; - for (auto& v : document.GetArray()) { - switch (v.GetType()) { - case rapidjson::Type::kStringType: - _backup_string.emplace_back(v.GetString(), v.GetStringLength()); - _string_nulls.push_back(false); - // do not set _data_string here. - // Because the address of the string stored in `_backup_string` may - // change each time `emplace_back()` is called. - break; - case rapidjson::Type::kNumberType: - if (v.IsUint()) { - wbytes = sprintf(tmp_buf, "%u", v.GetUint()); - } else if (v.IsInt()) { - wbytes = sprintf(tmp_buf, "%d", v.GetInt()); - } else if (v.IsUint64()) { - wbytes = sprintf(tmp_buf, "%lu", v.GetUint64()); - } else if (v.IsInt64()) { - wbytes = sprintf(tmp_buf, "%ld", v.GetInt64()); - } else { - wbytes = sprintf(tmp_buf, "%f", v.GetDouble()); - } - _backup_string.emplace_back(tmp_buf, wbytes); - _string_nulls.push_back(false); - // do not set _data_string here. - // Because the address of the string stored in `_backup_string` may - // change each time `emplace_back()` is called. - break; - case rapidjson::Type::kFalseType: - _data_string.emplace_back(true_value); - _string_nulls.push_back(false); - break; - case rapidjson::Type::kTrueType: - _data_string.emplace_back(false_value); - _string_nulls.push_back(false); - break; - case rapidjson::Type::kNullType: - _data_string.push_back({}); - _string_nulls.push_back(true); - break; - default: - _data_string.push_back({}); - _string_nulls.push_back(true); - break; - } - ++i; - } - // Must set _data_string at the end, so that we can - // save the real addr of string in `_backup_string` to `_data_string`. - for (auto& str : _backup_string) { - _data_string.emplace_back(str); - } - break; - } - default: - CHECK(false) << type; - break; + break; + } + default: + CHECK(false) << type; + break; } return size; } ///////////////////////// ExplodeJsonArrayTableFunction::ExplodeJsonArrayTableFunction(ExplodeJsonArrayType type) - : _type(type) { - + : _type(type) { switch (type) { - case ExplodeJsonArrayType::INT: - _fn_name = "explode_json_array_int"; - break; - case ExplodeJsonArrayType::DOUBLE: - _fn_name = "explode_json_array_double"; - break; - case ExplodeJsonArrayType::STRING: - _fn_name = "explode_json_array_string"; - break; - default: - _fn_name = "unknown"; - break; + case ExplodeJsonArrayType::INT: + _fn_name = "explode_json_array_int"; + break; + case ExplodeJsonArrayType::DOUBLE: + _fn_name = "explode_json_array_double"; + break; + case ExplodeJsonArrayType::STRING: + _fn_name = "explode_json_array_string"; + break; + default: + _fn_name = "unknown"; + break; } } -ExplodeJsonArrayTableFunction::~ExplodeJsonArrayTableFunction() { -} +ExplodeJsonArrayTableFunction::~ExplodeJsonArrayTableFunction() {} Status ExplodeJsonArrayTableFunction::process(TupleRow* tuple_row) { - CHECK(1 == _expr_context->root()->get_num_children()) << _expr_context->root()->get_num_children(); + CHECK(1 == _expr_context->root()->get_num_children()) + << _expr_context->root()->get_num_children(); _is_current_empty = false; _eos = false; @@ -157,8 +156,9 @@ Status ExplodeJsonArrayTableFunction::process(TupleRow* tuple_row) { _is_current_empty = true; } else { rapidjson::Document document; - document.Parse((char*) text.ptr, text.len); - if (UNLIKELY(document.HasParseError()) || !document.IsArray() || document.GetArray().Size() == 0) { + document.Parse((char*)text.ptr, text.len); + if (UNLIKELY(document.HasParseError()) || !document.IsArray() || + document.GetArray().Size() == 0) { _is_current_empty = true; } else { _cur_size = _parsed_data.set_output(_type, document); diff --git a/be/src/exprs/table_function/explode_json_array.h b/be/src/exprs/table_function/explode_json_array.h index 616535ea12..d98c56d929 100644 --- a/be/src/exprs/table_function/explode_json_array.h +++ b/be/src/exprs/table_function/explode_json_array.h @@ -87,9 +87,10 @@ struct ParsedData { *output = _data[offset]; break; case ExplodeJsonArrayType::STRING: - *output = _string_nulls[offset] ? nullptr - : real ? reinterpret_cast(_backup_string[offset].data()) - : &_data_string[offset]; + *output = _string_nulls[offset] + ? nullptr + : real ? reinterpret_cast(_backup_string[offset].data()) + : &_data_string[offset]; break; default: CHECK(false) << type; diff --git a/be/src/exprs/topn_function.h b/be/src/exprs/topn_function.h index b50700e8d0..78eed2ec8e 100644 --- a/be/src/exprs/topn_function.h +++ b/be/src/exprs/topn_function.h @@ -32,16 +32,16 @@ public: static void topn_update(FunctionContext*, const T& src, const IntVal& topn, StringVal* dst); template - static void topn_update(FunctionContext*, const T& src, const IntVal& topn, const IntVal& space_expand_rate, - StringVal* dst); + static void topn_update(FunctionContext*, const T& src, const IntVal& topn, + const IntVal& space_expand_rate, StringVal* dst); - static void topn_merge(FunctionContext*,const StringVal& src, StringVal* dst); + static void topn_merge(FunctionContext*, const StringVal& src, StringVal* dst); static StringVal topn_serialize(FunctionContext* ctx, const StringVal& src); static StringVal topn_finalize(FunctionContext*, const StringVal& src); }; -} +} // namespace doris #endif //DORIS_BE_SRC_EXPRS_TOPN_FUNCTION_H diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index 6c90ff06c1..5501b774f1 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -52,8 +52,7 @@ Status CompactionAction::_check_param(HttpRequest* req, uint64_t* tablet_id) { try { *tablet_id = std::stoull(req_tablet_id); } catch (const std::exception& e) { - return Status::InternalError( - strings::Substitute("convert tablet_id failed, $0", e.what())); + return Status::InternalError(strings::Substitute("convert tablet_id failed, $0", e.what())); } return Status::OK(); @@ -62,14 +61,11 @@ Status CompactionAction::_check_param(HttpRequest* req, uint64_t* tablet_id) { // for viewing the compaction status Status CompactionAction::_handle_show_compaction(HttpRequest* req, std::string* json_result) { uint64_t tablet_id = 0; - RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), - "check param failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), "check param failed"); - TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); if (tablet == nullptr) { - return Status::NotFound( - strings::Substitute("Tablet not found. tablet_id=$0", tablet_id)); + return Status::NotFound(strings::Substitute("Tablet not found. tablet_id=$0", tablet_id)); } tablet->get_compaction_status(json_result); @@ -80,23 +76,20 @@ Status CompactionAction::_handle_run_compaction(HttpRequest* req, std::string* j // 1. param check // check req_tablet_id is not empty uint64_t tablet_id = 0; - RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), - "check param failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), "check param failed"); // check compaction_type equals 'base' or 'cumulative' std::string compaction_type = req->param(PARAM_COMPACTION_TYPE); if (compaction_type != PARAM_COMPACTION_BASE && compaction_type != PARAM_COMPACTION_CUMULATIVE) { return Status::NotSupported( - strings::Substitute("The compaction type '$0' is not supported", compaction_type)); + strings::Substitute("The compaction type '$0' is not supported", compaction_type)); } // 2. fetch the tablet by tablet_id - TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); if (tablet == nullptr) { - return Status::NotFound( - strings::Substitute("Tablet not found. tablet_id=$0", tablet_id)); + return Status::NotFound(strings::Substitute("Tablet not found. tablet_id=$0", tablet_id)); } // 3. execute compaction task @@ -141,7 +134,7 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st uint64_t tablet_id = 0; // check req_tablet_id is not empty - RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id),"check param failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), "check param failed"); if (tablet_id == 0) { // overall compaction status @@ -152,10 +145,9 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); if (tablet == nullptr) { LOG(WARNING) << "invalid argument.tablet_id:" << tablet_id; - return Status::InternalError( - strings::Substitute("fail to get $0", tablet_id)); + return Status::InternalError(strings::Substitute("fail to get $0", tablet_id)); } - + std::string json_template = R"({ "status" : "Success", "run_status" : $0, @@ -163,14 +155,15 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st "tablet_id" : $2, "compact_type" : "$3" })"; - + std::string msg = "compaction task for this tablet is not running"; std::string compaction_type = ""; bool run_status = 0; - + { // use try lock to check this tablet is running cumulative compaction - std::unique_lock lock_cumulative(tablet->get_cumulative_compaction_lock(), std::try_to_lock); + std::unique_lock lock_cumulative(tablet->get_cumulative_compaction_lock(), + std::try_to_lock); if (!lock_cumulative.owns_lock()) { msg = "compaction task for this tablet is running"; compaction_type = "cumulative"; @@ -180,30 +173,34 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st return Status::OK(); } } - + { // use try lock to check this tablet is running base compaction - std::unique_lock lock_base(tablet->get_base_compaction_lock(), std::try_to_lock); + std::unique_lock lock_base(tablet->get_base_compaction_lock(), + std::try_to_lock); if (!lock_base.owns_lock()) { msg = "compaction task for this tablet is running"; compaction_type = "base"; run_status = 1; - *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, compaction_type); + *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, + compaction_type); return Status::OK(); } } // not running any compaction - *json_result = strings::Substitute(json_template, run_status, msg, tablet_id, compaction_type); + *json_result = + strings::Substitute(json_template, run_status, msg, tablet_id, compaction_type); return Status::OK(); } } Status CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet, - const std::string& compaction_type) { + const std::string& compaction_type) { std::shared_ptr cumulative_compaction_policy = _create_cumulative_compaction_policy(); if (tablet->get_cumulative_compaction_policy() == nullptr || - tablet->get_cumulative_compaction_policy()->name() != cumulative_compaction_policy->name()) { + tablet->get_cumulative_compaction_policy()->name() != + cumulative_compaction_policy->name()) { tablet->set_cumulative_compaction_policy(cumulative_compaction_policy); } @@ -275,7 +272,8 @@ void CompactionAction::handle(HttpRequest* req) { } } -std::shared_ptr CompactionAction::_create_cumulative_compaction_policy() { +std::shared_ptr +CompactionAction::_create_cumulative_compaction_policy() { std::string current_policy = ""; { std::lock_guard lock(*config::get_mutable_string_config_lock()); diff --git a/be/src/http/action/compaction_action.h b/be/src/http/action/compaction_action.h index f4cb792fab..08dfb0ce70 100644 --- a/be/src/http/action/compaction_action.h +++ b/be/src/http/action/compaction_action.h @@ -58,8 +58,7 @@ private: Status _handle_run_compaction(HttpRequest* req, std::string* json_result); /// thread callback function for the tablet to do compaction - Status _execute_compaction_callback(TabletSharedPtr tablet, - const std::string& compaction_type); + Status _execute_compaction_callback(TabletSharedPtr tablet, const std::string& compaction_type); /// fetch compaction running status Status _handle_run_status_compaction(HttpRequest* req, std::string* json_result); diff --git a/be/src/http/action/config_action.cpp b/be/src/http/action/config_action.cpp index e4dab39820..9c95276db3 100644 --- a/be/src/http/action/config_action.cpp +++ b/be/src/http/action/config_action.cpp @@ -60,7 +60,7 @@ void ConfigAction::handle_show_config(HttpRequest* req) { for (const std::string& config_filed : _config) { writer.String(config_filed.c_str()); } - writer.EndArray(); + writer.EndArray(); } writer.EndArray(); diff --git a/be/src/http/action/config_action.h b/be/src/http/action/config_action.h index 27836e62c1..ca20f3862f 100644 --- a/be/src/http/action/config_action.h +++ b/be/src/http/action/config_action.h @@ -29,7 +29,7 @@ enum ConfigActionType { // Update BE config. class ConfigAction : public HttpHandler { public: - ConfigAction(ConfigActionType type): _type(type) {} + ConfigAction(ConfigActionType type) : _type(type) {} virtual ~ConfigAction() {} @@ -41,7 +41,6 @@ private: void handle_update_config(HttpRequest* req); void handle_show_config(HttpRequest* req); - }; } // namespace doris diff --git a/be/src/http/action/stream_load.cpp b/be/src/http/action/stream_load.cpp index 11c8f4a51e..d0a1693440 100644 --- a/be/src/http/action/stream_load.cpp +++ b/be/src/http/action/stream_load.cpp @@ -263,7 +263,8 @@ Status StreamLoadAction::_on_header(HttpRequest* http_req, StreamLoadContext* ct } // get format of this put - if (!http_req->header(HTTP_COMPRESS_TYPE).empty() && boost::iequals(http_req->header(HTTP_FORMAT_KEY), "JSON")) { + if (!http_req->header(HTTP_COMPRESS_TYPE).empty() && + boost::iequals(http_req->header(HTTP_FORMAT_KEY), "JSON")) { return Status::InternalError("compress data of JSON format is not supported."); } std::string format_str = http_req->header(HTTP_FORMAT_KEY); @@ -353,8 +354,8 @@ void StreamLoadAction::on_chunk_data(HttpRequest* req) { bb->flip(); auto st = ctx->body_sink->append(bb); if (!st.ok()) { - LOG(WARNING) << "append body content failed. errmsg=" << st.get_error_msg() - << ", " << ctx->brief(); + LOG(WARNING) << "append body content failed. errmsg=" << st.get_error_msg() << ", " + << ctx->brief(); ctx->status = st; return; } @@ -508,7 +509,8 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext* if (!http_req->header(HTTP_SEND_BATCH_PARALLELISM).empty()) { try { - request.__set_send_batch_parallelism(std::stoi(http_req->header(HTTP_SEND_BATCH_PARALLELISM))); + request.__set_send_batch_parallelism( + std::stoi(http_req->header(HTTP_SEND_BATCH_PARALLELISM))); } catch (const std::invalid_argument& e) { return Status::InvalidArgument("Invalid send_batch_parallelism format"); } @@ -603,10 +605,12 @@ Status StreamLoadAction::_data_saved_path(HttpRequest* req, std::string* file_pa void StreamLoadAction::_sava_stream_load_record(StreamLoadContext* ctx, const std::string& str) { auto stream_load_recorder = StorageEngine::instance()->get_stream_load_recorder(); if (stream_load_recorder != nullptr) { - std::string key = std::to_string(ctx->start_millis + ctx->load_cost_millis) + "_" + ctx->label; + std::string key = + std::to_string(ctx->start_millis + ctx->load_cost_millis) + "_" + ctx->label; auto st = stream_load_recorder->put(key, str); if (st.ok()) { - LOG(INFO) << "put stream_load_record rocksdb successfully. label: " << ctx->label << ", key: " << key; + LOG(INFO) << "put stream_load_record rocksdb successfully. label: " << ctx->label + << ", key: " << key; } } else { LOG(WARNING) << "put stream_load_record rocksdb failed. stream_load_recorder is null."; diff --git a/be/src/http/action/stream_load_2pc.cpp b/be/src/http/action/stream_load_2pc.cpp index 511bce9015..cd71bb5e95 100644 --- a/be/src/http/action/stream_load_2pc.cpp +++ b/be/src/http/action/stream_load_2pc.cpp @@ -73,7 +73,7 @@ void StreamLoad2PCAction::handle(HttpRequest* req) { status = Status::InternalError("no valid Basic authorization"); } - status =_exec_env->stream_load_executor()->operate_txn_2pc(ctx); + status = _exec_env->stream_load_executor()->operate_txn_2pc(ctx); if (!status.ok()) { status_result = to_json(status); @@ -83,7 +83,8 @@ void StreamLoad2PCAction::handle(HttpRequest* req) { HttpChannel::send_reply(req, HttpStatus::OK, status_result); } -std::string StreamLoad2PCAction::get_success_info(const std::string txn_id, const std::string txn_operation) { +std::string StreamLoad2PCAction::get_success_info(const std::string txn_id, + const std::string txn_operation) { rapidjson::StringBuffer s; rapidjson::PrettyWriter writer(s); diff --git a/be/src/http/action/stream_load_2pc.h b/be/src/http/action/stream_load_2pc.h index 7043b526ef..63791e2e83 100644 --- a/be/src/http/action/stream_load_2pc.h +++ b/be/src/http/action/stream_load_2pc.h @@ -28,7 +28,7 @@ class StreamLoad2PCAction : public HttpHandler { public: StreamLoad2PCAction(ExecEnv* exec_env); - virtual ~StreamLoad2PCAction(){}; + virtual ~StreamLoad2PCAction() {}; void handle(HttpRequest* req) override; std::string get_success_info(const std::string txn_id, const std::string txn_operation); diff --git a/be/src/http/action/tablets_distribution_action.h b/be/src/http/action/tablets_distribution_action.h index 42582c07d5..4cbc25e9ec 100644 --- a/be/src/http/action/tablets_distribution_action.h +++ b/be/src/http/action/tablets_distribution_action.h @@ -27,7 +27,7 @@ namespace doris { class TabletsDistributionAction : public HttpHandler { public: TabletsDistributionAction(); - void handle(HttpRequest *req) override; + void handle(HttpRequest* req) override; EasyJson get_tablets_distribution_group_by_partition(uint64_t partition_id); std::string host() { return _host; } @@ -35,4 +35,3 @@ private: std::string _host; }; } // namespace doris - diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index 0495c6f5b7..57c988c68c 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -153,12 +153,13 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr limit_str = tracker->limit() == -1 ? "none" : AccurateItoaKMGT(tracker->limit()); current_consumption_str = AccurateItoaKMGT(tracker->consumption()); peak_consumption_str = AccurateItoaKMGT(tracker->peak_consumption()); - + int64_t use_count = tracker.use_count(); (*output) << strings::Substitute( - "$0$1$2" // id, parent, limit - "$3$4$5\n", // current, peak - tracker->label(), parent, limit_str, current_consumption_str, peak_consumption_str, use_count); + "$0$1$2" // id, parent, limit + "$3$4$5\n", // current, peak + tracker->label(), parent, limit_str, current_consumption_str, peak_consumption_str, + use_count); } (*output) << "\n"; } diff --git a/be/src/http/ev_http_server.h b/be/src/http/ev_http_server.h index af25d5883b..f5ca94760a 100644 --- a/be/src/http/ev_http_server.h +++ b/be/src/http/ev_http_server.h @@ -67,7 +67,7 @@ private: int _server_fd = -1; std::unique_ptr _workers; - std::mutex _event_bases_lock; // protect _event_bases + std::mutex _event_bases_lock; // protect _event_bases std::vector> _event_bases; std::mutex _handler_lock; diff --git a/be/src/http/http_client.h b/be/src/http/http_client.h index 3f3028e584..c03275aa3e 100644 --- a/be/src/http/http_client.h +++ b/be/src/http/http_client.h @@ -105,7 +105,8 @@ public: *length = cl; return Status::OK(); } - return Status::InternalError(fmt::format("failed to get content length. err code: {}", code)); + return Status::InternalError( + fmt::format("failed to get content length. err code: {}", code)); } long get_http_status() const { diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index ade087deb1..b8b65312d2 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -47,7 +47,7 @@ public: // will last util finalize function is called. Memory allocated from heap should // be freed in finalize function to avoid memory leak. void init(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, - ObjectPool* agg_pool) const { + ObjectPool* agg_pool) const { _init_fn(dst, src, src_null, mem_pool, agg_pool); } @@ -70,9 +70,7 @@ public: // Memory Note: All heap memory allocated in init and update function should be freed // before this function return. Memory allocated from *mem_pool will be still available // and will be freed by client. - void finalize(RowCursorCell* src, MemPool* mem_pool) const { - _finalize_fn(src, mem_pool); - } + void finalize(RowCursorCell* src, MemPool* mem_pool) const { _finalize_fn(src, mem_pool); } FieldAggregationMethod agg_method() const { return _agg_method; } diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index 8e988c99c6..948ec425d3 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -139,7 +139,8 @@ Status BaseCompaction::pick_rowsets_to_compact() { int64_t interval_since_last_base_compaction = time(nullptr) - base_creation_time; if (interval_since_last_base_compaction > interval_threshold) { VLOG_NOTICE << "satisfy the base compaction policy. tablet=" << _tablet->full_name() - << ", interval_since_last_base_compaction=" << interval_since_last_base_compaction + << ", interval_since_last_base_compaction=" + << interval_since_last_base_compaction << ", interval_threshold=" << interval_threshold; return Status::OK(); } diff --git a/be/src/olap/base_compaction.h b/be/src/olap/base_compaction.h index 65b05f8bfa..21a6e24bc3 100644 --- a/be/src/olap/base_compaction.h +++ b/be/src/olap/base_compaction.h @@ -51,4 +51,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 0de5b0434d..72970cfb2c 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -29,7 +29,8 @@ extern MetricPrototype METRIC_query_scan_bytes; extern MetricPrototype METRIC_query_scan_rows; extern MetricPrototype METRIC_query_scan_count; -BaseTablet::BaseTablet(TabletMetaSharedPtr tablet_meta, const StorageParamPB& storage_param, DataDir* data_dir) +BaseTablet::BaseTablet(TabletMetaSharedPtr tablet_meta, const StorageParamPB& storage_param, + DataDir* data_dir) : _state(tablet_meta->tablet_state()), _tablet_meta(tablet_meta), _storage_param(storage_param), @@ -68,17 +69,21 @@ void BaseTablet::_gen_tablet_path() { if (_data_dir != nullptr && _tablet_meta != nullptr) { FilePathDesc root_path_desc; root_path_desc.filepath = _data_dir->path_desc().filepath; - root_path_desc.storage_medium = fs::fs_util::get_t_storage_medium(_storage_param.storage_medium()); + root_path_desc.storage_medium = + fs::fs_util::get_t_storage_medium(_storage_param.storage_medium()); if (_data_dir->is_remote()) { root_path_desc.storage_name = _storage_param.storage_name(); - root_path_desc.remote_path = StorageBackendMgr::get_root_path_from_param(_storage_param); + root_path_desc.remote_path = + StorageBackendMgr::get_root_path_from_param(_storage_param); } FilePathDescStream desc_s; desc_s << root_path_desc << DATA_PREFIX; FilePathDesc path_desc = path_util::join_path_desc_segments( desc_s.path_desc(), std::to_string(_tablet_meta->shard_id())); - path_desc = path_util::join_path_desc_segments(path_desc, std::to_string(_tablet_meta->tablet_id())); - _tablet_path_desc = path_util::join_path_desc_segments(path_desc, std::to_string(_tablet_meta->schema_hash())); + path_desc = path_util::join_path_desc_segments(path_desc, + std::to_string(_tablet_meta->tablet_id())); + _tablet_path_desc = path_util::join_path_desc_segments( + path_desc, std::to_string(_tablet_meta->schema_hash())); if (_tablet_path_desc.is_remote()) { _tablet_path_desc.remote_path += "/" + _tablet_meta->tablet_uid().to_string(); } diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 2e3d2cfcf4..20fe1a5f16 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -33,7 +33,8 @@ class DataDir; // storage engine evolves. class BaseTablet : public std::enable_shared_from_this { public: - BaseTablet(TabletMetaSharedPtr tablet_meta, const StorageParamPB& storage_param, DataDir* data_dir); + BaseTablet(TabletMetaSharedPtr tablet_meta, const StorageParamPB& storage_param, + DataDir* data_dir); virtual ~BaseTablet(); DataDir* data_dir() const; @@ -149,4 +150,3 @@ inline const TabletSchema& BaseTablet::tablet_schema() const { } } /* namespace doris */ - diff --git a/be/src/olap/block_column_predicate.cpp b/be/src/olap/block_column_predicate.cpp index 725ebda470..5fce8beebe 100644 --- a/be/src/olap/block_column_predicate.cpp +++ b/be/src/olap/block_column_predicate.cpp @@ -27,37 +27,43 @@ void SingleColumnBlockPredicate::evaluate(RowBlockV2* block, uint16_t* selected_ _predicate->evaluate(&column_block, block->selection_vector(), selected_size); } -void SingleColumnBlockPredicate::evaluate_and(RowBlockV2 *block, uint16_t selected_size, bool *flags) const { +void SingleColumnBlockPredicate::evaluate_and(RowBlockV2* block, uint16_t selected_size, + bool* flags) const { auto column_id = _predicate->column_id(); auto column_block = block->column_block(column_id); _predicate->evaluate_and(&column_block, block->selection_vector(), selected_size, flags); } -void SingleColumnBlockPredicate::evaluate_or(RowBlockV2 *block, uint16_t selected_size, bool *flags) const { +void SingleColumnBlockPredicate::evaluate_or(RowBlockV2* block, uint16_t selected_size, + bool* flags) const { auto column_id = _predicate->column_id(); auto column_block = block->column_block(column_id); _predicate->evaluate_or(&column_block, block->selection_vector(), selected_size, flags); } -void SingleColumnBlockPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const { +void SingleColumnBlockPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t* selected_size) const { auto column_id = _predicate->column_id(); auto& column = block[column_id]; _predicate->evaluate(*column, sel, selected_size); } - -void SingleColumnBlockPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const { + +void SingleColumnBlockPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const { auto column_id = _predicate->column_id(); auto& column = block[column_id]; _predicate->evaluate_and(*column, sel, selected_size, flags); } - -void SingleColumnBlockPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const { + +void SingleColumnBlockPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const { auto column_id = _predicate->column_id(); auto& column = block[column_id]; _predicate->evaluate_or(*column, sel, selected_size, flags); } - -void SingleColumnBlockPredicate::evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const { + +void SingleColumnBlockPredicate::evaluate_vec(vectorized::MutableColumns& block, uint16_t size, + bool* flags) const { auto column_id = _predicate->column_id(); auto& column = block[column_id]; _predicate->evaluate_vec(*column, size, flags); @@ -84,7 +90,8 @@ void OrBlockColumnPredicate::evaluate(RowBlockV2* block, uint16_t* selected_size } } -void OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const { +void OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t* selected_size) const { if (num_of_column_predicate() == 1) { _block_column_predicate_vec[0]->evaluate(block, sel, selected_size); } else { @@ -94,7 +101,7 @@ void OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_ auto column_predicate = _block_column_predicate_vec[i]; column_predicate->evaluate_or(block, sel, *selected_size, ret_flags); } - + uint16_t new_size = 0; for (int i = 0; i < *selected_size; ++i) { if (ret_flags[i]) { @@ -105,19 +112,22 @@ void OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_ } } -void OrBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t selected_size, bool* flags) const { +void OrBlockColumnPredicate::evaluate_or(RowBlockV2* block, uint16_t selected_size, + bool* flags) const { for (auto block_column_predicate : _block_column_predicate_vec) { block_column_predicate->evaluate_or(block, selected_size, flags); } } -void OrBlockColumnPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const { +void OrBlockColumnPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const { for (auto block_column_predicate : _block_column_predicate_vec) { block_column_predicate->evaluate_or(block, sel, selected_size, flags); } } -void OrBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t selected_size, bool* flags) const { +void OrBlockColumnPredicate::evaluate_and(RowBlockV2* block, uint16_t selected_size, + bool* flags) const { if (num_of_column_predicate() == 1) { _block_column_predicate_vec[0]->evaluate_and(block, selected_size, flags); } else { @@ -134,7 +144,8 @@ void OrBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t selected_s } } -void OrBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const { +void OrBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const { if (num_of_column_predicate() == 1) { _block_column_predicate_vec[0]->evaluate_and(block, sel, selected_size, flags); } else { @@ -144,7 +155,7 @@ void OrBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uin auto column_predicate = _block_column_predicate_vec[i]; column_predicate->evaluate_or(block, sel, selected_size, ret_flags); } - + for (int i = 0; i < selected_size; ++i) { flags[i] &= ret_flags[i]; } @@ -157,25 +168,29 @@ void AndBlockColumnPredicate::evaluate(RowBlockV2* block, uint16_t* selected_siz } } -void AndBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const { +void AndBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t* selected_size) const { for (auto block_column_predicate : _block_column_predicate_vec) { block_column_predicate->evaluate(block, sel, selected_size); } } -void AndBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t selected_size, bool* flags) const { +void AndBlockColumnPredicate::evaluate_and(RowBlockV2* block, uint16_t selected_size, + bool* flags) const { for (auto block_column_predicate : _block_column_predicate_vec) { block_column_predicate->evaluate_and(block, selected_size, flags); } } -void AndBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const { +void AndBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const { for (auto block_column_predicate : _block_column_predicate_vec) { block_column_predicate->evaluate_and(block, sel, selected_size, flags); } } -void AndBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t selected_size, bool* flags) const { +void AndBlockColumnPredicate::evaluate_or(RowBlockV2* block, uint16_t selected_size, + bool* flags) const { if (num_of_column_predicate() == 1) { _block_column_predicate_vec[0]->evaluate_or(block, selected_size, flags); } else { @@ -193,25 +208,27 @@ void AndBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t selected_s } } -void AndBlockColumnPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const { +void AndBlockColumnPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const { if (num_of_column_predicate() == 1) { _block_column_predicate_vec[0]->evaluate_or(block, sel, selected_size, flags); } else { bool new_flags[selected_size]; memset(new_flags, true, selected_size); - + for (auto block_column_predicate : _block_column_predicate_vec) { block_column_predicate->evaluate_and(block, sel, selected_size, new_flags); } - + for (uint16_t i = 0; i < selected_size; i++) { flags[i] |= new_flags[i]; } } } - + // todo(wb) Can the 'and' of multiple bitmaps be vectorized? -void AndBlockColumnPredicate::evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const { +void AndBlockColumnPredicate::evaluate_vec(vectorized::MutableColumns& block, uint16_t size, + bool* flags) const { if (num_of_column_predicate() == 1) { _block_column_predicate_vec[0]->evaluate_vec(block, size, flags); } else { @@ -219,9 +236,9 @@ void AndBlockColumnPredicate::evaluate_vec(vectorized::MutableColumns& block, ui for (auto block_column_predicate : _block_column_predicate_vec) { memset(new_flags, true, size); block_column_predicate->evaluate_vec(block, size, new_flags); - + for (uint16_t j = 0; j < size; j++) { - flags[j] &= new_flags[j] ; + flags[j] &= new_flags[j]; } } } diff --git a/be/src/olap/block_column_predicate.h b/be/src/olap/block_column_predicate.h index 1927c9477e..4bd4d7baf8 100644 --- a/be/src/olap/block_column_predicate.h +++ b/be/src/olap/block_column_predicate.h @@ -43,16 +43,20 @@ public: virtual void get_all_column_ids(std::set& column_id_set) const = 0; - virtual void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const {}; - virtual void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {}; - virtual void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {}; - - virtual void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const {}; + virtual void evaluate(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t* selected_size) const {}; + virtual void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const {}; + virtual void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t selected_size, bool* flags) const {}; + + virtual void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, + bool* flags) const {}; }; class SingleColumnBlockPredicate : public BlockColumnPredicate { public: - explicit SingleColumnBlockPredicate(const ColumnPredicate* pre):_predicate(pre) {}; + explicit SingleColumnBlockPredicate(const ColumnPredicate* pre) : _predicate(pre) {}; void evaluate(RowBlockV2* block, uint16_t* selected_size) const override; void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags) const override; @@ -62,10 +66,13 @@ public: column_id_set.insert(_predicate->column_id()); }; - void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const override; - void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override; - void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override; - + void evaluate(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t* selected_size) const override; + void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, + bool* flags) const override; + void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, + bool* flags) const override; + void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const override; private: @@ -86,9 +93,7 @@ public: _block_column_predicate_vec.push_back(column_predicate); } - size_t num_of_column_predicate() const { - return _block_column_predicate_vec.size(); - } + size_t num_of_column_predicate() const { return _block_column_predicate_vec.size(); } void get_all_column_ids(std::set& column_id_set) const override { for (auto child_block_predicate : _block_column_predicate_vec) { @@ -110,9 +115,12 @@ public: void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags) const override; void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags) const override; - void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const override; - void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override; - void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override; + void evaluate(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t* selected_size) const override; + void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, + bool* flags) const override; + void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, + bool* flags) const override; // note(wb) we didnt't impelment evaluate_vec method here, because storage layer only support AND predicate now; }; @@ -127,13 +135,14 @@ public: // 2.Evaluate OR SEMANTICS in flags use 1 result to get proper select flags void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags) const override; - void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const override; - void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override; - void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override; + void evaluate(vectorized::MutableColumns& block, uint16_t* sel, + uint16_t* selected_size) const override; + void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, + bool* flags) const override; + void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, + bool* flags) const override; void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const override; - }; } //namespace doris - diff --git a/be/src/olap/bloom_filter.hpp b/be/src/olap/bloom_filter.hpp index 21662a5ce6..824806f8f5 100644 --- a/be/src/olap/bloom_filter.hpp +++ b/be/src/olap/bloom_filter.hpp @@ -34,8 +34,7 @@ static const uint64_t BLOOM_FILTER_NULL_HASHCODE = 2862933555777941757ULL; struct BloomFilterIndexHeader { uint64_t block_count; - BloomFilterIndexHeader() : - block_count(0) {} + BloomFilterIndexHeader() : block_count(0) {} } __attribute__((packed)); // Bare metal bit set implementation. For performance reasons, this implementation does not @@ -44,10 +43,8 @@ class BitSet { public: BitSet() : _data(nullptr), _data_len(0) {} - ~BitSet() { - SAFE_DELETE_ARRAY(_data); - } - + ~BitSet() { SAFE_DELETE_ARRAY(_data); } + // Init BitSet with given bit_num, which will align up to uint64_t bool init(uint32_t bit_num) { if (bit_num <= 0) { @@ -55,7 +52,7 @@ public: } _data_len = (bit_num + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); - _data = new(std::nothrow) uint64_t[_data_len]; + _data = new (std::nothrow) uint64_t[_data_len]; if (_data == nullptr) { return false; } @@ -72,14 +69,10 @@ public: } // Set the bit specified by param, note that uint64_t type contains 2^6 bits - void set(uint32_t index) { - _data[index >> 6] |= 1L << (index % 64); - } + void set(uint32_t index) { _data[index >> 6] |= 1L << (index % 64); } // Return true if the bit specified by param is set - bool get(uint32_t index) const { - return (_data[index >> 6] & (1L << (index % 64))) != 0; - } + bool get(uint32_t index) const { return (_data[index >> 6] & (1L << (index % 64))) != 0; } // Merge with another BitSet by byte, return false when the length is not equal bool merge(const BitSet& set) { @@ -107,21 +100,13 @@ public: return str; } - uint64_t* data() const { - return _data; - } + uint64_t* data() const { return _data; } - uint32_t data_len() const { - return _data_len; - } + uint32_t data_len() const { return _data_len; } - uint32_t bit_num() const { - return _data_len * sizeof(uint64_t) * 8; - } + uint32_t bit_num() const { return _data_len * sizeof(uint64_t) * 8; } - void clear() { - memset(_data, 0, _data_len * sizeof(uint64_t)); - } + void clear() { memset(_data, 0, _data_len * sizeof(uint64_t)); } void reset() { _data = NULL; @@ -130,7 +115,7 @@ public: private: uint64_t* _data; - uint32_t _data_len; + uint32_t _data_len; }; class BloomFilter { @@ -164,16 +149,16 @@ public: // Compute hash value of given buffer and add to BloomFilter void add_bytes(const char* buf, uint32_t len) { - uint64_t hash = buf == nullptr ? - BLOOM_FILTER_NULL_HASHCODE : HashUtil::hash64(buf, len, DEFAULT_SEED); + uint64_t hash = buf == nullptr ? BLOOM_FILTER_NULL_HASHCODE + : HashUtil::hash64(buf, len, DEFAULT_SEED); add_hash(hash); } // Generate multiple hash value according to following rule: // new_hash_value = hash_high_part + (i * hash_low_part) void add_hash(uint64_t hash) { - uint32_t hash1 = (uint32_t) hash; - uint32_t hash2 = (uint32_t) (hash >> 32); + uint32_t hash1 = (uint32_t)hash; + uint32_t hash2 = (uint32_t)(hash >> 32); for (uint32_t i = 0; i < _hash_function_num; ++i) { uint64_t combine_hash = hash1 + hash2 * i; @@ -184,15 +169,15 @@ public: // Compute hash value of given buffer and verify whether exist in BloomFilter bool test_bytes(const char* buf, uint32_t len) const { - uint64_t hash = buf == nullptr ? - BLOOM_FILTER_NULL_HASHCODE : HashUtil::hash64(buf, len, DEFAULT_SEED); + uint64_t hash = buf == nullptr ? BLOOM_FILTER_NULL_HASHCODE + : HashUtil::hash64(buf, len, DEFAULT_SEED); return test_hash(hash); } // Verify whether hash value in BloomFilter bool test_hash(uint64_t hash) const { - uint32_t hash1 = (uint32_t) hash; - uint32_t hash2 = (uint32_t) (hash >> 32); + uint32_t hash1 = (uint32_t)hash; + uint32_t hash2 = (uint32_t)(hash >> 32); for (uint32_t i = 0; i < _hash_function_num; ++i) { uint64_t combine_hash = hash1 + hash2 * i; @@ -208,8 +193,7 @@ public: // Merge with another BloomFilter, return false when the length // and hash function number is not equal bool merge(const BloomFilter& that) { - if (_bit_num == that.bit_num() - && _hash_function_num == that.hash_function_num()) { + if (_bit_num == that.bit_num() && _hash_function_num == that.hash_function_num()) { _bit_set.merge(that.bit_set()); return true; } @@ -217,9 +201,7 @@ public: return false; } - void clear() { - _bit_set.clear(); - } + void clear() { _bit_set.clear(); } void reset() { _bit_num = 0; @@ -227,45 +209,34 @@ public: _bit_set.reset(); } - uint32_t bit_num() const { - return _bit_num; - } + uint32_t bit_num() const { return _bit_num; } - uint32_t hash_function_num() const { - return _hash_function_num; - } + uint32_t hash_function_num() const { return _hash_function_num; } - const BitSet& bit_set() const { - return _bit_set; - } + const BitSet& bit_set() const { return _bit_set; } - uint64_t* bit_set_data() const { - return _bit_set.data(); - } + uint64_t* bit_set_data() const { return _bit_set.data(); } - uint32_t bit_set_data_len() const { - return _bit_set.data_len(); - } + uint32_t bit_set_data_len() const { return _bit_set.data_len(); } // Convert BloomFilter to string to convenient debug and test std::string to_string() const { std::stringstream bf_stream; - bf_stream << "bit_num:" << _bit_num - << " hash_function_num:" << _hash_function_num + bf_stream << "bit_num:" << _bit_num << " hash_function_num:" << _hash_function_num << " bit_set:" << _bit_set.to_string(); return bf_stream.str(); } // Get points which set by given buffer in the BitSet std::string get_bytes_points_string(const char* buf, uint32_t len) const { - uint64_t hash = buf == nullptr ? - BLOOM_FILTER_NULL_HASHCODE : HashUtil::hash64(buf, len, DEFAULT_SEED); - uint32_t hash1 = (uint32_t) hash; - uint32_t hash2 = (uint32_t) (hash >> 32); + uint64_t hash = buf == nullptr ? BLOOM_FILTER_NULL_HASHCODE + : HashUtil::hash64(buf, len, DEFAULT_SEED); + uint32_t hash1 = (uint32_t)hash; + uint32_t hash2 = (uint32_t)(hash >> 32); std::stringstream stream; for (uint32_t i = 0; i < _hash_function_num; ++i) { - if (i !=0 ) { + if (i != 0) { stream << "-"; } @@ -281,13 +252,13 @@ private: // Compute the optimal bit number according to the following rule: // m = -n * ln(fpp) / (ln(2) ^ 2) uint32_t _optimal_bit_num(int64_t n, double fpp) { - return (uint32_t) (-n * log(fpp) / (log(2) * log(2))); + return (uint32_t)(-n * log(fpp) / (log(2) * log(2))); } // Compute the optimal hash function number according to the following rule: // k = round(m * ln(2) / n) uint32_t _optimal_hash_function_num(int64_t n, uint32_t m) { - uint32_t k = (uint32_t) round(m * log(2) / n); + uint32_t k = (uint32_t)round(m * log(2) / n); return k > 1 ? k : 1; } @@ -296,6 +267,6 @@ private: uint32_t _hash_function_num; }; -} // namespace doris +} // namespace doris #endif // DORIS_BE_SRC_OLAP_COLUMN_FILE_BLOOM_FILTER_HPP diff --git a/be/src/olap/bloom_filter_predicate.cpp b/be/src/olap/bloom_filter_predicate.cpp index 834df71989..b5bc370eb8 100644 --- a/be/src/olap/bloom_filter_predicate.cpp +++ b/be/src/olap/bloom_filter_predicate.cpp @@ -39,11 +39,11 @@ ColumnPredicate* BloomFilterColumnPredicateFactory::create_column_predicate( FieldType type) { std::shared_ptr filter; switch (type) { -#define M(NAME) \ - case OLAP_FIELD_##NAME: { \ - filter.reset(create_bloom_filter(NAME)); \ - filter->light_copy(bloom_filter.get()); \ - return new BloomFilterColumnPredicate(column_id, filter); \ +#define M(NAME) \ + case OLAP_FIELD_##NAME: { \ + filter.reset(create_bloom_filter(NAME)); \ + filter->light_copy(bloom_filter.get()); \ + return new BloomFilterColumnPredicate(column_id, filter); \ } APPLY_FOR_PRIMTYPE(M) #undef M diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 1605248229..6a1c1d0856 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -86,7 +86,7 @@ void BloomFilterColumnPredicate::evaluate(VectorizedRowBatch* batch) const { template void BloomFilterColumnPredicate::evaluate(ColumnBlock* block, uint16_t* sel, - uint16_t* size) const { + uint16_t* size) const { uint16_t new_size = 0; if (block->is_nullable()) { for (uint16_t i = 0; i < *size; ++i) { @@ -109,7 +109,7 @@ void BloomFilterColumnPredicate::evaluate(ColumnBlock* block, uint16_t* sel, template void BloomFilterColumnPredicate::evaluate(vectorized::IColumn& column, uint16_t* sel, - uint16_t* size) const { + uint16_t* size) const { uint16_t new_size = 0; using FT = typename PredicatePrimitiveTypeTraits::PredicateFieldType; @@ -118,16 +118,18 @@ void BloomFilterColumnPredicate::evaluate(vectorized::IColumn& column, uint16 auto& null_map_data = nullable_col->get_null_map_column().get_data(); // deal ColumnDict if (nullable_col->get_nested_column().is_column_dictionary()) { - auto* dict_col = vectorized::check_and_get_column(nullable_col->get_nested_column()); + auto* dict_col = vectorized::check_and_get_column( + nullable_col->get_nested_column()); const_cast(dict_col)->generate_hash_values(); for (uint16_t i = 0; i < *size; i++) { uint16_t idx = sel[i]; sel[new_size] = idx; - new_size += (!null_map_data[idx]) && _specific_filter->find_uint32_t(dict_col->get_hash_value(idx)); + new_size += (!null_map_data[idx]) && + _specific_filter->find_uint32_t(dict_col->get_hash_value(idx)); } } else { auto* pred_col = vectorized::check_and_get_column>( - nullable_col->get_nested_column()); + nullable_col->get_nested_column()); auto& pred_col_data = pred_col->get_data(); for (uint16_t i = 0; i < *size; i++) { uint16_t idx = sel[i]; @@ -166,4 +168,3 @@ public: }; } //namespace doris - diff --git a/be/src/olap/bloom_filter_reader.cpp b/be/src/olap/bloom_filter_reader.cpp index b98bac0352..74fbf36e08 100644 --- a/be/src/olap/bloom_filter_reader.cpp +++ b/be/src/olap/bloom_filter_reader.cpp @@ -28,7 +28,7 @@ BloomFilterIndexReader::~BloomFilterIndexReader() { } Status BloomFilterIndexReader::init(char* buffer, size_t buffer_size, bool is_using_cache, - uint32_t hash_function_num, uint32_t bit_num) { + uint32_t hash_function_num, uint32_t bit_num) { Status res = Status::OK(); _buffer = buffer; diff --git a/be/src/olap/bloom_filter_reader.h b/be/src/olap/bloom_filter_reader.h index 4098140b87..06408a0ada 100644 --- a/be/src/olap/bloom_filter_reader.h +++ b/be/src/olap/bloom_filter_reader.h @@ -34,8 +34,8 @@ public: ~BloomFilterIndexReader(); // Init BloomFilterIndexReader with given bloom filter index buffer - Status init(char* buffer, size_t buffer_size, bool is_using_cache, - uint32_t hash_function_num, uint32_t bit_num); + Status init(char* buffer, size_t buffer_size, bool is_using_cache, uint32_t hash_function_num, + uint32_t bit_num); // Get specified bloom filter entry const BloomFilter& entry(uint64_t entry_id); @@ -64,4 +64,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/bloom_filter_writer.h b/be/src/olap/bloom_filter_writer.h index f07a505542..b2c8224c3f 100644 --- a/be/src/olap/bloom_filter_writer.h +++ b/be/src/olap/bloom_filter_writer.h @@ -40,4 +40,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/byte_buffer.h b/be/src/olap/byte_buffer.h index 9055eb305e..b43e7fcccc 100644 --- a/be/src/olap/byte_buffer.h +++ b/be/src/olap/byte_buffer.h @@ -208,4 +208,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/collect_iterator.cpp b/be/src/olap/collect_iterator.cpp index d18cd45b15..e8ac0ffab2 100644 --- a/be/src/olap/collect_iterator.cpp +++ b/be/src/olap/collect_iterator.cpp @@ -87,20 +87,23 @@ void CollectIterator::build_heap(const std::vector& rs_re } ++i; } - Level1Iterator* cumu_iter = - new Level1Iterator(cumu_children, cumu_children.size() > 1, _reverse, - _reader->_sequence_col_idx, &_reader->_merged_rows, sort_type, sort_col_num); + Level1Iterator* cumu_iter = new Level1Iterator( + cumu_children, cumu_children.size() > 1, _reverse, _reader->_sequence_col_idx, + &_reader->_merged_rows, sort_type, sort_col_num); cumu_iter->init(); - _inner_iter.reset(new Level1Iterator(std::list{*base_reader_child, cumu_iter}, _merge, - _reverse, _reader->_sequence_col_idx, &_reader->_merged_rows, sort_type, sort_col_num)); + _inner_iter.reset(new Level1Iterator( + std::list {*base_reader_child, cumu_iter}, _merge, _reverse, + _reader->_sequence_col_idx, &_reader->_merged_rows, sort_type, sort_col_num)); } else { // _children.size() == 1 - _inner_iter.reset(new Level1Iterator(_children, _merge, - _reverse, _reader->_sequence_col_idx, &_reader->_merged_rows, sort_type, sort_col_num)); + _inner_iter.reset(new Level1Iterator(_children, _merge, _reverse, + _reader->_sequence_col_idx, &_reader->_merged_rows, + sort_type, sort_col_num)); } } else { - _inner_iter.reset(new Level1Iterator(_children, _merge, - _reverse, _reader->_sequence_col_idx, &_reader->_merged_rows, sort_type, sort_col_num)); + _inner_iter.reset(new Level1Iterator(_children, _merge, _reverse, + _reader->_sequence_col_idx, &_reader->_merged_rows, + sort_type, sort_col_num)); } _inner_iter->init(); // Clear _children earlier to release any related references @@ -122,12 +125,13 @@ bool CollectIterator::LevelIteratorComparator::operator()(const LevelIterator* a if (_sequence_id_idx != -1) { auto seq_first_cell = first->cell(_sequence_id_idx); auto seq_second_cell = second->cell(_sequence_id_idx); - auto res = first->schema()->column(_sequence_id_idx)->compare_cell(seq_first_cell, seq_second_cell); + auto res = first->schema() + ->column(_sequence_id_idx) + ->compare_cell(seq_first_cell, seq_second_cell); if (res != 0) { res < 0 ? a->set_need_skip(true) : b->set_need_skip(true); return res < 0; } - } // if row cursors equal, compare data version. // read data from higher version to lower version. @@ -141,8 +145,7 @@ bool CollectIterator::LevelIteratorComparator::operator()(const LevelIterator* a return a->version() > b->version(); } -CollectIterator::BaseComparator::BaseComparator( - std::shared_ptr& cmp) { +CollectIterator::BaseComparator::BaseComparator(std::shared_ptr& cmp) { _cmp = cmp; } @@ -151,7 +154,7 @@ bool CollectIterator::BaseComparator::operator()(const LevelIterator* a, const L } bool CollectIterator::LevelZorderIteratorComparator::operator()(const LevelIterator* a, - const LevelIterator* b) { + const LevelIterator* b) { // First compare row cursor. const RowCursor* first = a->current_row(); const RowCursor* second = b->current_row(); @@ -184,7 +187,8 @@ Status CollectIterator::next(const RowCursor** row, bool* delete_flag) { } } -CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader, TabletReader* reader) +CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader, + TabletReader* reader) : _rs_reader(rs_reader), _is_delete(rs_reader->delete_flag()), _reader(reader) { if (LIKELY(rs_reader->type() == RowsetTypePB::BETA_ROWSET)) { _refresh_current_row = &Level0Iterator::_refresh_current_row_v2; @@ -270,11 +274,15 @@ Status CollectIterator::Level0Iterator::next(const RowCursor** row, bool* delete } CollectIterator::Level1Iterator::Level1Iterator( - const std::list& children, - bool merge, bool reverse, int sequence_id_idx, uint64_t* merge_count, - SortType sort_type, int sort_col_num) - : _children(children), _merge(merge), _reverse(reverse), _sequence_id_idx(sequence_id_idx), - _merged_rows(merge_count), _sort_type(sort_type), _sort_col_num(sort_col_num) {} + const std::list& children, bool merge, bool reverse, + int sequence_id_idx, uint64_t* merge_count, SortType sort_type, int sort_col_num) + : _children(children), + _merge(merge), + _reverse(reverse), + _sequence_id_idx(sequence_id_idx), + _merged_rows(merge_count), + _sort_type(sort_type), + _sort_col_num(sort_col_num) {} CollectIterator::LevelIterator::~LevelIterator() = default; @@ -345,7 +353,8 @@ Status CollectIterator::Level1Iterator::init() { if (_merge && _children.size() > 1) { std::shared_ptr cmp; if (_sort_type == SortType::ZORDER) { - cmp = std::make_shared(_reverse, _sequence_id_idx, _sort_col_num); + cmp = std::make_shared(_reverse, _sequence_id_idx, + _sort_col_num); } else { cmp = std::make_shared(_reverse, _sequence_id_idx); } @@ -368,7 +377,7 @@ Status CollectIterator::Level1Iterator::init() { } inline Status CollectIterator::Level1Iterator::_merge_next(const RowCursor** row, - bool* delete_flag) { + bool* delete_flag) { _heap->pop(); auto res = _cur_child->next(row, delete_flag); if (LIKELY(res.ok())) { @@ -399,7 +408,7 @@ inline Status CollectIterator::Level1Iterator::_merge_next(const RowCursor** row } inline Status CollectIterator::Level1Iterator::_normal_next(const RowCursor** row, - bool* delete_flag) { + bool* delete_flag) { auto res = _cur_child->next(row, delete_flag); if (LIKELY(res.ok())) { return Status::OK(); diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 0e7f52abf1..7ebb4604f0 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -91,4 +91,3 @@ protected: }; } //namespace doris - diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 852ec5cae7..156977a9ba 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -87,7 +87,7 @@ Status Compaction::do_compaction_impl(int64_t permits) { // The test results show that merger is low-memory-footprint, there is no need to tracker its mem pool Merger::Statistics stats; auto res = Merger::vmerge_rowsets(_tablet, compaction_type(), _input_rs_readers, - _output_rs_writer.get(), &stats); + _output_rs_writer.get(), &stats); if (!res.ok()) { LOG(WARNING) << "fail to do " << compaction_name() << ". res=" << res << ", tablet=" << _tablet->full_name() @@ -193,7 +193,7 @@ void Compaction::gc_output_rowset() { // Two versions before and after the missing version will be saved in missing_version, // if missing_version is not null. Status Compaction::find_longest_consecutive_version(std::vector* rowsets, - std::vector* missing_version) { + std::vector* missing_version) { if (rowsets->empty()) { return Status::OK(); } diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h index 1f15e8df89..240821b25f 100644 --- a/be/src/olap/compaction.h +++ b/be/src/olap/compaction.h @@ -70,7 +70,7 @@ protected: Status check_version_continuity(const std::vector& rowsets); Status check_correctness(const Merger::Statistics& stats); Status find_longest_consecutive_version(std::vector* rowsets, - std::vector* missing_version); + std::vector* missing_version); int64_t get_compaction_permits(); private: @@ -101,4 +101,3 @@ protected: }; } // namespace doris - diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index 45a89f92ad..d76a2bb506 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -28,8 +28,8 @@ namespace doris { -#define COMPARISON_PRED_CONSTRUCTOR(CLASS) \ - template \ +#define COMPARISON_PRED_CONSTRUCTOR(CLASS) \ + template \ CLASS::CLASS(uint32_t column_id, const T& value, bool opposite) \ : ColumnPredicate(column_id, opposite), _value(value) {} @@ -55,55 +55,54 @@ COMPARISON_PRED_CONSTRUCTOR_STRING(LessEqualPredicate) COMPARISON_PRED_CONSTRUCTOR_STRING(GreaterPredicate) COMPARISON_PRED_CONSTRUCTOR_STRING(GreaterEqualPredicate) -#define COMPARISON_PRED_EVALUATE(CLASS, OP) \ - template \ - void CLASS::evaluate(VectorizedRowBatch* batch) const { \ - uint16_t n = batch->size(); \ - if (n == 0) { \ - return; \ - } \ - uint16_t* sel = batch->selected(); \ - const T* col_vector = \ - reinterpret_cast(batch->column(_column_id)->col_data()); \ - uint16_t new_size = 0; \ - if (batch->column(_column_id)->no_nulls()) { \ - if (batch->selected_in_use()) { \ - for (uint16_t j = 0; j != n; ++j) { \ - uint16_t i = sel[j]; \ - sel[new_size] = i; \ - new_size += (col_vector[i] OP _value); \ - } \ - batch->set_size(new_size); \ - } else { \ - for (uint16_t i = 0; i != n; ++i) { \ - sel[new_size] = i; \ - new_size += (col_vector[i] OP _value); \ - } \ - if (new_size < n) { \ - batch->set_size(new_size); \ - batch->set_selected_in_use(true); \ - } \ - } \ - } else { \ - bool* is_null = batch->column(_column_id)->is_null(); \ - if (batch->selected_in_use()) { \ - for (uint16_t j = 0; j != n; ++j) { \ - uint16_t i = sel[j]; \ - sel[new_size] = i; \ - new_size += (!is_null[i] && (col_vector[i] OP _value)); \ - } \ - batch->set_size(new_size); \ - } else { \ - for (uint16_t i = 0; i != n; ++i) { \ - sel[new_size] = i; \ - new_size += (!is_null[i] && (col_vector[i] OP _value)); \ - } \ - if (new_size < n) { \ - batch->set_size(new_size); \ - batch->set_selected_in_use(true); \ - } \ - } \ - } \ +#define COMPARISON_PRED_EVALUATE(CLASS, OP) \ + template \ + void CLASS::evaluate(VectorizedRowBatch* batch) const { \ + uint16_t n = batch->size(); \ + if (n == 0) { \ + return; \ + } \ + uint16_t* sel = batch->selected(); \ + const T* col_vector = reinterpret_cast(batch->column(_column_id)->col_data()); \ + uint16_t new_size = 0; \ + if (batch->column(_column_id)->no_nulls()) { \ + if (batch->selected_in_use()) { \ + for (uint16_t j = 0; j != n; ++j) { \ + uint16_t i = sel[j]; \ + sel[new_size] = i; \ + new_size += (col_vector[i] OP _value); \ + } \ + batch->set_size(new_size); \ + } else { \ + for (uint16_t i = 0; i != n; ++i) { \ + sel[new_size] = i; \ + new_size += (col_vector[i] OP _value); \ + } \ + if (new_size < n) { \ + batch->set_size(new_size); \ + batch->set_selected_in_use(true); \ + } \ + } \ + } else { \ + bool* is_null = batch->column(_column_id)->is_null(); \ + if (batch->selected_in_use()) { \ + for (uint16_t j = 0; j != n; ++j) { \ + uint16_t i = sel[j]; \ + sel[new_size] = i; \ + new_size += (!is_null[i] && (col_vector[i] OP _value)); \ + } \ + batch->set_size(new_size); \ + } else { \ + for (uint16_t i = 0; i != n; ++i) { \ + sel[new_size] = i; \ + new_size += (!is_null[i] && (col_vector[i] OP _value)); \ + } \ + if (new_size < n) { \ + batch->set_size(new_size); \ + batch->set_selected_in_use(true); \ + } \ + } \ + } \ } COMPARISON_PRED_EVALUATE(EqualPredicate, ==) @@ -113,30 +112,28 @@ COMPARISON_PRED_EVALUATE(LessEqualPredicate, <=) COMPARISON_PRED_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=) -#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ - template \ - void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ - uint16_t new_size = 0; \ - if (block->is_nullable()) { \ - for (uint16_t i = 0; i < *size; ++i) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (!block->cell(idx).is_null() && (*cell_value OP _value)); \ - new_size += _opposite ? !result : result; \ - } \ - } else { \ - for (uint16_t i = 0; i < *size; ++i) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (*cell_value OP _value); \ - new_size += _opposite ? !result : result; \ - } \ - } \ - *size = new_size; \ +#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ + template \ + void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ + uint16_t new_size = 0; \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (!block->cell(idx).is_null() && (*cell_value OP _value)); \ + new_size += _opposite ? !result : result; \ + } \ + } else { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (*cell_value OP _value); \ + new_size += _opposite ? !result : result; \ + } \ + } \ + *size = new_size; \ } COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(EqualPredicate, ==) @@ -190,7 +187,8 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) } else if (column.is_column_dictionary()) { \ if constexpr (std::is_same_v) { \ auto& dict_col = \ - reinterpret_cast&>(column);\ + reinterpret_cast&>( \ + column); \ auto& data_array = dict_col.get_data(); \ auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ : dict_col.find_code(_value); \ @@ -198,13 +196,12 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - bool ret = cell_value OP dict_code; \ + bool ret = cell_value OP dict_code; \ new_size += _opposite ? !ret : ret; \ } \ } \ } else { \ - auto& pred_column_ref = \ - reinterpret_cast&>(column); \ + auto& pred_column_ref = reinterpret_cast&>(column); \ auto& data_array = pred_column_ref.get_data(); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ @@ -217,7 +214,6 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) *size = new_size; \ } - COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false) COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=, false) COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <, true) @@ -225,35 +221,34 @@ COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true) COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true) COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true) -#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \ - template \ - void CLASS::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) \ - const { \ - if (column.is_nullable()) { \ - auto* nullable_column = \ - vectorized::check_and_get_column(column); \ - auto& data_array = reinterpret_cast&>( \ - nullable_column->get_nested_column()) \ - .get_data(); \ - auto& null_bitmap = reinterpret_cast&>( \ - *(nullable_column->get_null_map_column_ptr())) \ - .get_data(); \ - for (uint16_t i = 0; i < size; i++) { \ - flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \ - } \ - } else { \ - auto& predicate_column = \ - reinterpret_cast&>(column); \ - auto& data_array = predicate_column.get_data(); \ - for (uint16_t i = 0; i < size; i++) { \ - flags[i] = data_array[i] OP _value; \ - } \ - } \ - if (_opposite) { \ - for (uint16_t i = 0; i < size; i++) { \ - flags[i] = !flags[i]; \ - } \ - } \ +#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \ + template \ + void CLASS::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const { \ + if (column.is_nullable()) { \ + auto* nullable_column = \ + vectorized::check_and_get_column(column); \ + auto& data_array = reinterpret_cast&>( \ + nullable_column->get_nested_column()) \ + .get_data(); \ + auto& null_bitmap = reinterpret_cast&>( \ + *(nullable_column->get_null_map_column_ptr())) \ + .get_data(); \ + for (uint16_t i = 0; i < size; i++) { \ + flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \ + } \ + } else { \ + auto& predicate_column = \ + reinterpret_cast&>(column); \ + auto& data_array = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; i++) { \ + flags[i] = data_array[i] OP _value; \ + } \ + } \ + if (_opposite) { \ + for (uint16_t i = 0; i < size; i++) { \ + flags[i] = !flags[i]; \ + } \ + } \ } COMPARISON_PRED_COLUMN_EVALUATE_VEC(EqualPredicate, ==) @@ -263,29 +258,27 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC(LessEqualPredicate, <=) COMPARISON_PRED_COLUMN_EVALUATE_VEC(GreaterPredicate, >) COMPARISON_PRED_COLUMN_EVALUATE_VEC(GreaterEqualPredicate, >=) -#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \ - template \ - void CLASS::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ - const { \ - if (block->is_nullable()) { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (!block->cell(idx).is_null() && (*cell_value OP _value)); \ - flags[i] |= _opposite ? !result : result; \ - } \ - } else { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (*cell_value OP _value); \ - flags[i] |= _opposite ? !result : result; \ - } \ - } \ +#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \ + template \ + void CLASS::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ + const { \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (!block->cell(idx).is_null() && (*cell_value OP _value)); \ + flags[i] |= _opposite ? !result : result; \ + } \ + } else { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (*cell_value OP _value); \ + flags[i] |= _opposite ? !result : result; \ + } \ + } \ } COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(EqualPredicate, ==) @@ -295,31 +288,38 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(LessEqualPredicate, <=) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterEqualPredicate, >=) -#define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP) \ - template \ - void CLASS::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \ - if (column.is_nullable()) { \ - auto* nullable_column = vectorized::check_and_get_column(column); \ - auto& data_array = reinterpret_cast&>(nullable_column->get_nested_column()).get_data(); \ - auto& null_bitmap = reinterpret_cast&>(*(nullable_column->get_null_map_column_ptr())).get_data();\ - for (uint16_t i = 0; i < size; i++) { \ - if (flags[i]) continue; \ - uint16_t idx = sel[i]; \ - bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \ - flags[i] |= _opposite ? !ret : ret; \ - } \ - } else { \ - auto& predicate_column = reinterpret_cast&>(column); \ - auto& data_array = predicate_column.get_data(); \ - for (uint16_t i = 0; i < size; ++i) { \ - if (flags[i]) continue; \ - uint16_t idx = sel[i]; \ - bool ret = data_array[idx] OP _value; \ - flags[i] |= _opposite ? !ret : ret; \ - } \ - } \ +#define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP) \ + template \ + void CLASS::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ + bool* flags) const { \ + if (column.is_nullable()) { \ + auto* nullable_column = \ + vectorized::check_and_get_column(column); \ + auto& data_array = reinterpret_cast&>( \ + nullable_column->get_nested_column()) \ + .get_data(); \ + auto& null_bitmap = reinterpret_cast&>( \ + *(nullable_column->get_null_map_column_ptr())) \ + .get_data(); \ + for (uint16_t i = 0; i < size; i++) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \ + flags[i] |= _opposite ? !ret : ret; \ + } \ + } else { \ + auto& predicate_column = \ + reinterpret_cast&>(column); \ + auto& data_array = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; ++i) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = data_array[idx] OP _value; \ + flags[i] |= _opposite ? !ret : ret; \ + } \ + } \ } - + COMPARISON_PRED_COLUMN_EVALUATE_OR(EqualPredicate, ==) COMPARISON_PRED_COLUMN_EVALUATE_OR(NotEqualPredicate, !=) COMPARISON_PRED_COLUMN_EVALUATE_OR(LessPredicate, <) @@ -327,29 +327,27 @@ COMPARISON_PRED_COLUMN_EVALUATE_OR(LessEqualPredicate, <=) COMPARISON_PRED_COLUMN_EVALUATE_OR(GreaterPredicate, >) COMPARISON_PRED_COLUMN_EVALUATE_OR(GreaterEqualPredicate, >=) -#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP) \ - template \ - void CLASS::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ - const { \ - if (block->is_nullable()) { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (!flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (!block->cell(idx).is_null() && (*cell_value OP _value)); \ - flags[i] &= _opposite ? !result : result; \ - } \ - } else { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (!flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (*cell_value OP _value); \ - flags[i] &= _opposite ? !result : result; \ - } \ - } \ +#define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP) \ + template \ + void CLASS::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ + const { \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (!block->cell(idx).is_null() && (*cell_value OP _value)); \ + flags[i] &= _opposite ? !result : result; \ + } \ + } else { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (*cell_value OP _value); \ + flags[i] &= _opposite ? !result : result; \ + } \ + } \ } COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(EqualPredicate, ==) @@ -359,31 +357,38 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(LessEqualPredicate, <=) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterEqualPredicate, >=) -#define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP) \ - template \ - void CLASS::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \ - if (column.is_nullable()) { \ - auto* nullable_column = vectorized::check_and_get_column(column); \ - auto& data_array = reinterpret_cast&>(nullable_column->get_nested_column()).get_data(); \ - auto& null_bitmap = reinterpret_cast&>(*(nullable_column->get_null_map_column_ptr())).get_data();\ - for (uint16_t i = 0; i < size; i++) { \ - if (!flags[i]) continue; \ - uint16_t idx = sel[i]; \ - bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \ - flags[i] &= _opposite ? !ret : ret; \ - } \ - } else { \ - auto& predicate_column = reinterpret_cast&>(column); \ - auto& data_array = predicate_column.get_data(); \ - for (uint16_t i = 0; i < size; ++i) { \ - if (!flags[i]) continue; \ - uint16_t idx = sel[i]; \ - bool ret = data_array[idx] OP _value; \ - flags[i] &= _opposite ? !ret : ret; \ - } \ - } \ +#define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP) \ + template \ + void CLASS::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ + bool* flags) const { \ + if (column.is_nullable()) { \ + auto* nullable_column = \ + vectorized::check_and_get_column(column); \ + auto& data_array = reinterpret_cast&>( \ + nullable_column->get_nested_column()) \ + .get_data(); \ + auto& null_bitmap = reinterpret_cast&>( \ + *(nullable_column->get_null_map_column_ptr())) \ + .get_data(); \ + for (uint16_t i = 0; i < size; i++) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \ + flags[i] &= _opposite ? !ret : ret; \ + } \ + } else { \ + auto& predicate_column = \ + reinterpret_cast&>(column); \ + auto& data_array = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; ++i) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = data_array[idx] OP _value; \ + flags[i] &= _opposite ? !ret : ret; \ + } \ + } \ } - + COMPARISON_PRED_COLUMN_EVALUATE_AND(EqualPredicate, ==) COMPARISON_PRED_COLUMN_EVALUATE_AND(NotEqualPredicate, !=) COMPARISON_PRED_COLUMN_EVALUATE_AND(LessPredicate, <) @@ -476,8 +481,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_AND(GreaterEqualPredicate, >=) #define COMPARISON_PRED_BITMAP_EVALUATE(CLASS, OP) \ template \ Status CLASS::evaluate(const Schema& schema, \ - const std::vector& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const { \ + const std::vector& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const { \ BitmapIndexIterator* iterator = iterators[_column_id]; \ if (iterator == nullptr) { \ return Status::OK(); \ @@ -689,4 +694,4 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate) COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate) COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate) -} //namespace doris +} //namespace doris \ No newline at end of file diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index a2cb966054..54ddd11e50 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -46,6 +46,7 @@ class VectorizedRowBatch; void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ bool* flags) const override; \ void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const override; \ + \ private: \ T _value; \ }; @@ -58,4 +59,3 @@ COMPARISON_PRED_CLASS_DEFINE(GreaterPredicate, GT) COMPARISON_PRED_CLASS_DEFINE(GreaterEqualPredicate, GE) } //namespace doris - diff --git a/be/src/olap/compress.h b/be/src/olap/compress.h index 71f641c8b0..4a3250ee27 100644 --- a/be/src/olap/compress.h +++ b/be/src/olap/compress.h @@ -53,4 +53,3 @@ Status lz4_compress(StorageByteBuffer* in, StorageByteBuffer* out, bool* smaller Status lz4_decompress(StorageByteBuffer* in, StorageByteBuffer* out); } // namespace doris - diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 1886bdc53d..df8dbc50f1 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -43,8 +43,8 @@ Status CumulativeCompaction::prepare_compact() { // 1. calculate cumulative point _tablet->calculate_cumulative_point(); TRACE("calculated cumulative point"); - VLOG_CRITICAL << "after calculate, current cumulative point is " << _tablet->cumulative_layer_point() - << ", tablet=" << _tablet->full_name(); + VLOG_CRITICAL << "after calculate, current cumulative point is " + << _tablet->cumulative_layer_point() << ", tablet=" << _tablet->full_name(); // 2. pick rowsets to compact RETURN_NOT_OK(pick_rowsets_to_compact()); @@ -82,7 +82,7 @@ Status CumulativeCompaction::execute_compact_impl() { _tablet->cumulative_compaction_policy()->update_cumulative_point( _tablet.get(), _input_rowsets, _output_rowset, _last_delete_version); VLOG_CRITICAL << "after cumulative compaction, current cumulative point is " - << _tablet->cumulative_layer_point() << ", tablet=" << _tablet->full_name(); + << _tablet->cumulative_layer_point() << ", tablet=" << _tablet->full_name(); // 6. add metric to cumulative compaction DorisMetrics::instance()->cumulative_compaction_deltas_total->increment(_input_rowsets.size()); diff --git a/be/src/olap/cumulative_compaction.h b/be/src/olap/cumulative_compaction.h index 7eb504c74f..f228e91975 100644 --- a/be/src/olap/cumulative_compaction.h +++ b/be/src/olap/cumulative_compaction.h @@ -42,10 +42,9 @@ protected: ReaderType compaction_type() const override { return ReaderType::READER_CUMULATIVE_COMPACTION; } private: - Version _last_delete_version{-1, -1}; + Version _last_delete_version {-1, -1}; DISALLOW_COPY_AND_ASSIGN(CumulativeCompaction); }; } // namespace doris - diff --git a/be/src/olap/cumulative_compaction_policy.cpp b/be/src/olap/cumulative_compaction_policy.cpp index d1a712c274..bc1dee3148 100644 --- a/be/src/olap/cumulative_compaction_policy.cpp +++ b/be/src/olap/cumulative_compaction_policy.cpp @@ -98,13 +98,14 @@ void SizeBasedCumulativeCompactionPolicy::calculate_cumulative_point( break; } - // include one situation: When the segment is not deleted, and is singleton delta, and is NONOVERLAPPING, ret_cumulative_point increase + // include one situation: When the segment is not deleted, and is singleton delta, and is NONOVERLAPPING, ret_cumulative_point increase prev_version = rs->version().second; *ret_cumulative_point = prev_version + 1; } - VLOG_NOTICE << "cumulative compaction size_based policy, calculate cumulative point value = " - << *ret_cumulative_point << ", calc promotion size value = " << promotion_size - << " tablet = " << tablet->full_name(); + VLOG_NOTICE + << "cumulative compaction size_based policy, calculate cumulative point value = " + << *ret_cumulative_point << ", calc promotion size value = " << promotion_size + << " tablet = " << tablet->full_name(); } else if (tablet->tablet_state() == TABLET_NOTREADY) { // tablet under alter process // we choose version next to the base version as cumulative point @@ -156,9 +157,9 @@ void SizeBasedCumulativeCompactionPolicy::update_cumulative_point( } } -void SizeBasedCumulativeCompactionPolicy::calc_cumulative_compaction_score(TabletState state, - const std::vector& all_metas, int64_t current_cumulative_point, - uint32_t* score) { +void SizeBasedCumulativeCompactionPolicy::calc_cumulative_compaction_score( + TabletState state, const std::vector& all_metas, + int64_t current_cumulative_point, uint32_t* score) { bool base_rowset_exist = false; const int64_t point = current_cumulative_point; int64_t promotion_size = 0; @@ -305,10 +306,11 @@ int SizeBasedCumulativeCompactionPolicy::pick_input_rowsets( rs_iter = input_rowsets->erase(rs_iter); } - VLOG_CRITICAL << "cumulative compaction size_based policy, compaction_score = " << *compaction_score - << ", total_size = " << total_size << ", calc promotion size value = " << promotion_size - << ", tablet = " << tablet->full_name() << ", input_rowset size " - << input_rowsets->size(); + VLOG_CRITICAL << "cumulative compaction size_based policy, compaction_score = " + << *compaction_score << ", total_size = " << total_size + << ", calc promotion size value = " << promotion_size + << ", tablet = " << tablet->full_name() << ", input_rowset size " + << input_rowsets->size(); // empty return if (input_rowsets->empty()) { @@ -396,9 +398,9 @@ int NumBasedCumulativeCompactionPolicy::pick_input_rowsets( return transient_size; } -void NumBasedCumulativeCompactionPolicy::calc_cumulative_compaction_score(TabletState state, - const std::vector& all_rowsets, const int64_t current_cumulative_point, - uint32_t* score) { +void NumBasedCumulativeCompactionPolicy::calc_cumulative_compaction_score( + TabletState state, const std::vector& all_rowsets, + const int64_t current_cumulative_point, uint32_t* score) { const int64_t point = current_cumulative_point; for (auto& rs_meta : all_rowsets) { if (rs_meta->start_version() < point) { @@ -465,10 +467,10 @@ void CumulativeCompactionPolicy::pick_candidate_rowsets( int64_t now = UnixSeconds(); for (auto& it : rs_version_map) { // find all rowset version greater than cumulative_point and skip the create time in skip_window_sec - if (it.first.first >= cumulative_point - && ((it.second->creation_time() + skip_window_sec < now) - // this case means a rowset has been compacted before which is not a new published rowset, so it should participate compaction - || (it.first.first != it.first.second))) { + if (it.first.first >= cumulative_point && + ((it.second->creation_time() + skip_window_sec < now) + // this case means a rowset has been compacted before which is not a new published rowset, so it should participate compaction + || (it.first.first != it.first.second))) { candidate_rowsets->push_back(it.second); } } diff --git a/be/src/olap/cumulative_compaction_policy.h b/be/src/olap/cumulative_compaction_policy.h index bfc9cc218d..0f59d52834 100644 --- a/be/src/olap/cumulative_compaction_policy.h +++ b/be/src/olap/cumulative_compaction_policy.h @@ -260,4 +260,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 25c876ac01..31cd143751 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -64,8 +64,8 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_compaction_num, MetricUnit::NOUNIT); static const char* const kTestFilePath = "/.testfile"; DataDir::DataDir(const std::string& path, int64_t capacity_bytes, - TStorageMedium::type storage_medium, - TabletManager* tablet_manager, TxnManager* txn_manager) + TStorageMedium::type storage_medium, TabletManager* tablet_manager, + TxnManager* txn_manager) : _path_desc(path), _capacity_bytes(capacity_bytes), _available_bytes(0), @@ -260,7 +260,8 @@ Status DataDir::get_shard(uint64_t* shard) { } shard_path_stream << _path_desc.filepath << DATA_PREFIX << "/" << next_shard; std::string shard_path = shard_path_stream.str(); - RETURN_WITH_WARN_IF_ERROR(Env::Default()->create_dirs(shard_path), Status::OLAPInternalError(OLAP_ERR_CANNOT_CREATE_DIR), + RETURN_WITH_WARN_IF_ERROR(Env::Default()->create_dirs(shard_path), + Status::OLAPInternalError(OLAP_ERR_CANNOT_CREATE_DIR), "fail to create path. path=" + shard_path); *shard = next_shard; @@ -361,8 +362,9 @@ Status DataDir::_check_incompatible_old_format_tablet() { Status DataDir::load() { LOG(INFO) << "start to load tablets from " << _path_desc.filepath; if (is_remote()) { - RETURN_WITH_WARN_IF_ERROR(StorageBackendMgr::instance()->init(_path_desc.filepath + STORAGE_PARAM_PREFIX), - Status::OLAPInternalError(OLAP_ERR_INIT_FAILED), "DataDir init failed."); + RETURN_WITH_WARN_IF_ERROR( + StorageBackendMgr::instance()->init(_path_desc.filepath + STORAGE_PARAM_PREFIX), + Status::OLAPInternalError(OLAP_ERR_INIT_FAILED), "DataDir init failed."); } // load rowset meta from meta env and create rowset // COMMITTED: add to txn manager @@ -386,8 +388,7 @@ Status DataDir::load() { dir_rowset_metas.push_back(rowset_meta); return true; }; - Status load_rowset_status = - RowsetMetaManager::traverse_rowset_metas(_meta, load_rowset_func); + Status load_rowset_status = RowsetMetaManager::traverse_rowset_metas(_meta, load_rowset_func); if (!load_rowset_status) { LOG(WARNING) << "errors when load rowset meta from meta env, skip this data dir:" @@ -404,9 +405,10 @@ Status DataDir::load() { auto load_tablet_func = [this, &tablet_ids, &failed_tablet_ids]( int64_t tablet_id, int32_t schema_hash, const std::string& value) -> bool { - Status status = _tablet_manager->load_tablet_from_meta( - this, tablet_id, schema_hash, value, false, false, false, false); - if (!status.ok() && status != Status::OLAPInternalError(OLAP_ERR_TABLE_ALREADY_DELETED_ERROR) && + Status status = _tablet_manager->load_tablet_from_meta(this, tablet_id, schema_hash, value, + false, false, false, false); + if (!status.ok() && + status != Status::OLAPInternalError(OLAP_ERR_TABLE_ALREADY_DELETED_ERROR) && status != Status::OLAPInternalError(OLAP_ERR_ENGINE_INSERT_OLD_TABLET)) { // load_tablet_from_meta() may return Status::OLAPInternalError(OLAP_ERR_TABLE_ALREADY_DELETED_ERROR) // which means the tablet status is DELETED @@ -486,7 +488,8 @@ Status DataDir::load() { rowset_meta->tablet_id(), rowset_meta->tablet_schema_hash(), rowset_meta->tablet_uid(), rowset_meta->load_id(), rowset, true); if (!commit_txn_status && - commit_txn_status != Status::OLAPInternalError(OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST)) { + commit_txn_status != + Status::OLAPInternalError(OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST)) { LOG(WARNING) << "failed to add committed rowset: " << rowset_meta->rowset_id() << " to tablet: " << rowset_meta->tablet_id() << " for txn: " << rowset_meta->txn_id(); @@ -711,7 +714,8 @@ bool DataDir::_check_pending_ids(const std::string& id) { Status DataDir::update_capacity() { RETURN_NOT_OK_STATUS_WITH_WARN( - Env::Default()->get_space_info(_path_desc.filepath, &_disk_capacity_bytes, &_available_bytes), + Env::Default()->get_space_info(_path_desc.filepath, &_disk_capacity_bytes, + &_available_bytes), strings::Substitute("get_space_info failed: $0", _path_desc.filepath)); if (_disk_capacity_bytes < 0) { _disk_capacity_bytes = _capacity_bytes; @@ -763,8 +767,10 @@ void DataDir::disks_compaction_num_increment(int64_t delta) { Status DataDir::move_to_trash(const FilePathDesc& segment_path_desc) { Status res = Status::OK(); FilePathDesc storage_root_desc = _path_desc; - if (is_remote() && !StorageBackendMgr::instance()->get_root_path( - segment_path_desc.storage_name, &(storage_root_desc.remote_path)).ok()) { + if (is_remote() && + !StorageBackendMgr::instance() + ->get_root_path(segment_path_desc.storage_name, &(storage_root_desc.remote_path)) + .ok()) { LOG(WARNING) << "get_root_path failed for storage_name: " << segment_path_desc.storage_name; return Status::OLAPInternalError(OLAP_ERR_OTHER_ERROR); } @@ -777,26 +783,29 @@ Status DataDir::move_to_trash(const FilePathDesc& segment_path_desc) { } // 2. generate new file path desc - static std::atomic delete_counter(0); // a global counter to avoid file name duplication. + static std::atomic delete_counter( + 0); // a global counter to avoid file name duplication. // when file_path points to a schema_path, we need to save tablet info in trash_path, // so we add file_path.parent_path().filename() in new_file_path. // other conditions are not considered, for they are nothing serious. FilePathDescStream trash_root_desc_s; - trash_root_desc_s << storage_root_desc << TRASH_PREFIX << "/" << time_str << "." << delete_counter++; + trash_root_desc_s << storage_root_desc << TRASH_PREFIX << "/" << time_str << "." + << delete_counter++; std::stringstream trash_local_file_stream; std::filesystem::path old_local_path(segment_path_desc.filepath); trash_local_file_stream << trash_root_desc_s.path_desc().filepath << "/" - << old_local_path.parent_path().filename().string() // tablet_path - << "/" << old_local_path.filename().string(); // segment_path + << old_local_path.parent_path().filename().string() // tablet_path + << "/" << old_local_path.filename().string(); // segment_path FilePathDesc trash_path_desc(trash_local_file_stream.str()); trash_path_desc.storage_medium = segment_path_desc.storage_medium; if (is_remote()) { std::stringstream trash_remote_file_stream; std::filesystem::path old_remote_path(segment_path_desc.remote_path); - trash_remote_file_stream << trash_root_desc_s.path_desc().remote_path - << "/" << old_remote_path.parent_path().parent_path().filename().string() // tablet_path - << "/" << old_remote_path.parent_path().filename().string() // segment_path - << "/" << old_remote_path.filename().string(); // tablet_uid + trash_remote_file_stream + << trash_root_desc_s.path_desc().remote_path << "/" + << old_remote_path.parent_path().parent_path().filename().string() // tablet_path + << "/" << old_remote_path.parent_path().filename().string() // segment_path + << "/" << old_remote_path.filename().string(); // tablet_uid trash_path_desc.remote_path = trash_remote_file_stream.str(); trash_path_desc.storage_name = segment_path_desc.storage_name; } @@ -813,7 +822,8 @@ Status DataDir::move_to_trash(const FilePathDesc& segment_path_desc) { // 4. move remote file to trash if needed if (is_remote()) { - std::string trash_storage_name_path = trash_root_desc_s.path_desc().filepath + "/" + STORAGE_NAME; + std::string trash_storage_name_path = + trash_root_desc_s.path_desc().filepath + "/" + STORAGE_NAME; Status st = env_util::write_string_to_file( Env::Default(), Slice(segment_path_desc.storage_name), trash_storage_name_path); if (!st.ok()) { @@ -821,20 +831,22 @@ Status DataDir::move_to_trash(const FilePathDesc& segment_path_desc) { << ", error:" << st.to_string(); return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); } - std::shared_ptr storage_backend = StorageBackendMgr::instance()-> - get_storage_backend(segment_path_desc.storage_name); + std::shared_ptr storage_backend = + StorageBackendMgr::instance()->get_storage_backend(segment_path_desc.storage_name); if (storage_backend == nullptr) { LOG(WARNING) << "storage_backend is invalid: " << segment_path_desc.storage_name; return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); } Status status = storage_backend->exist_dir(segment_path_desc.remote_path); if (status.ok()) { - VLOG_NOTICE << "Move remote file to trash. " << segment_path_desc.remote_path - << " -> " << trash_path_desc.remote_path; - Status rename_status = storage_backend->rename_dir(segment_path_desc.remote_path, trash_path_desc.remote_path); + VLOG_NOTICE << "Move remote file to trash. " << segment_path_desc.remote_path << " -> " + << trash_path_desc.remote_path; + Status rename_status = storage_backend->rename_dir(segment_path_desc.remote_path, + trash_path_desc.remote_path); if (!rename_status.ok()) { OLAP_LOG_WARNING("Move remote file to trash failed. [file=%s target='%s']", - segment_path_desc.remote_path.c_str(), trash_path_desc.remote_path.c_str()); + segment_path_desc.remote_path.c_str(), + trash_path_desc.remote_path.c_str()); return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); } } else if (status.is_not_found()) { @@ -846,7 +858,8 @@ Status DataDir::move_to_trash(const FilePathDesc& segment_path_desc) { } // 5. move file to trash - VLOG_NOTICE << "move file to trash. " << segment_path_desc.filepath << " -> " << trash_local_file; + VLOG_NOTICE << "move file to trash. " << segment_path_desc.filepath << " -> " + << trash_local_file; if (rename(segment_path_desc.filepath.c_str(), trash_local_file.c_str()) < 0) { OLAP_LOG_WARNING("move file to trash failed. [file=%s target='%s' err='%m']", segment_path_desc.filepath.c_str(), trash_local_file.c_str()); diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 2479b35cc4..b4b86ac98c 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -48,9 +48,9 @@ using google::protobuf::RepeatedPtrField; namespace doris { -Status DeleteConditionHandler::generate_delete_predicate( - const TabletSchema& schema, const std::vector& conditions, - DeletePredicatePB* del_pred) { +Status DeleteConditionHandler::generate_delete_predicate(const TabletSchema& schema, + const std::vector& conditions, + DeletePredicatePB* del_pred) { if (conditions.empty()) { LOG(WARNING) << "invalid parameters for store_cond." << " condition_size=" << conditions.size(); @@ -156,7 +156,7 @@ bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column } Status DeleteConditionHandler::check_condition_valid(const TabletSchema& schema, - const TCondition& cond) { + const TCondition& cond) { // Check whether the column exists int32_t field_index = schema.field_index(cond.column_name); if (field_index < 0) { @@ -229,9 +229,8 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio return true; } -Status DeleteHandler::init(const TabletSchema& schema, - const DelPredicateArray& delete_conditions, int64_t version, - const TabletReader* reader) { +Status DeleteHandler::init(const TabletSchema& schema, const DelPredicateArray& delete_conditions, + int64_t version, const TabletReader* reader) { DCHECK(!_is_inited) << "reinitialize delete handler."; DCHECK(version >= 0) << "invalid parameters. version=" << version; diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index c676e06df5..afd8bae410 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -38,8 +38,8 @@ class DeleteConditionHandler { public: // generated DeletePredicatePB by TCondition Status generate_delete_predicate(const TabletSchema& schema, - const std::vector& conditions, - DeletePredicatePB* del_pred); + const std::vector& conditions, + DeletePredicatePB* del_pred); // construct sub condition from TCondition std::string construct_sub_predicates(const TCondition& condition); @@ -95,7 +95,7 @@ public: // * Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_PARAMETERS): input parameters are not valid // * Status::OLAPInternalError(OLAP_ERR_MALLOC_ERROR): alloc memory failed Status init(const TabletSchema& schema, const DelPredicateArray& delete_conditions, - int64_t version, const doris::TabletReader* = nullptr); + int64_t version, const doris::TabletReader* = nullptr); // Check whether a row should be deleted. // @@ -137,4 +137,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 70eb6a7b8e..efd9d06212 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -310,7 +310,8 @@ Status DeltaWriter::close() { return Status::OK(); } -Status DeltaWriter::close_wait(google::protobuf::RepeatedPtrField* tablet_vec, bool is_broken) { +Status DeltaWriter::close_wait(google::protobuf::RepeatedPtrField* tablet_vec, + bool is_broken) { std::lock_guard l(_lock); DCHECK(_is_init) << "delta writer is supposed be to initialized before close_wait() being called"; @@ -328,8 +329,8 @@ Status DeltaWriter::close_wait(google::protobuf::RepeatedPtrField* LOG(WARNING) << "fail to build rowset"; return Status::OLAPInternalError(OLAP_ERR_MALLOC_ERROR); } - Status res = _storage_engine->txn_manager()->commit_txn( - _req.partition_id, _tablet, _req.txn_id, _req.load_id, _cur_rowset, false); + Status res = _storage_engine->txn_manager()->commit_txn(_req.partition_id, _tablet, _req.txn_id, + _req.load_id, _cur_rowset, false); if (!res && res != Status::OLAPInternalError(OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST)) { LOG(WARNING) << "Failed to commit txn: " << _req.txn_id << " for rowset: " << _cur_rowset->rowset_id(); @@ -347,9 +348,8 @@ Status DeltaWriter::close_wait(google::protobuf::RepeatedPtrField* _delta_written_success = true; const FlushStatistic& stat = _flush_token->get_stats(); - VLOG_CRITICAL << "close delta writer for tablet: " << _tablet->tablet_id() - << ", load id: " << print_id(_req.load_id) - << ", stats: " << stat; + VLOG_CRITICAL << "close delta writer for tablet: " << _tablet->tablet_id() + << ", load id: " << print_id(_req.load_id) << ", stats: " << stat; return Status::OK(); } diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index aa6fcb33e7..e095fff132 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -125,4 +125,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/field.h b/be/src/olap/field.h index 72a84ace8f..2973bc1fbb 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -78,12 +78,10 @@ public: return (char*)pool->allocate(_type_info->size()); } - virtual char* allocate_zone_map_value(MemPool* pool) const { - return allocate_value(pool); - } + virtual char* allocate_zone_map_value(MemPool* pool) const { return allocate_value(pool); } void agg_update(RowCursorCell* dest, const RowCursorCell& src, - MemPool* mem_pool = nullptr) const { + MemPool* mem_pool = nullptr) const { if (type() == OLAP_FIELD_TYPE_STRING && mem_pool == nullptr && !src.is_null()) { auto dst_slice = reinterpret_cast(dest->mutable_cell_ptr()); auto src_slice = reinterpret_cast(src.cell_ptr()); @@ -235,7 +233,7 @@ public: //convert and copy field from src to desc Status convert_from(char* dest, const char* src, const TypeInfo* src_type, - MemPool* mem_pool) const { + MemPool* mem_pool) const { return _type_info->convert_from(dest, src, src_type, mem_pool, get_variable_len()); } @@ -790,4 +788,3 @@ public: }; } // namespace doris - diff --git a/be/src/olap/file_helper.h b/be/src/olap/file_helper.h index 3dabdc6fff..75d47742d9 100644 --- a/be/src/olap/file_helper.h +++ b/be/src/olap/file_helper.h @@ -237,8 +237,7 @@ private: // FileHandler implementation template -Status FileHeader::prepare( - FileHandlerType* file_handler) { +Status FileHeader::prepare(FileHandlerType* file_handler) { if (nullptr == file_handler) { return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); } @@ -286,7 +285,7 @@ Status FileHeader::serialize( } if (!file_handler->pwrite(&_extra_fixed_header, sizeof(_extra_fixed_header), - _fixed_file_header_size)) { + _fixed_file_header_size)) { char errmsg[64]; LOG(WARNING) << "fail to write extra fixed header to file. [file='" << file_handler->file_name() << "' err=" << strerror_r(errno, errmsg, 64) @@ -295,7 +294,7 @@ Status FileHeader::serialize( } if (!file_handler->pwrite(_proto_string.c_str(), _proto_string.size(), - _fixed_file_header_size + sizeof(_extra_fixed_header))) { + _fixed_file_header_size + sizeof(_extra_fixed_header))) { char errmsg[64]; LOG(WARNING) << "fail to write proto header to file. [file='" << file_handler->file_name() << "' err='" << strerror_r(errno, errmsg, 64) << "']"; @@ -351,7 +350,7 @@ Status FileHeader::unserialize( << ", version=" << _fixed_file_header.version; if (!file_handler->pread(&_extra_fixed_header, sizeof(_extra_fixed_header), - _fixed_file_header_size)) { + _fixed_file_header_size)) { char errmsg[64]; LOG(WARNING) << "fail to load extra fixed header from file. file=" << file_handler->file_name() << ", error=" << strerror_r(errno, errmsg, 64); @@ -368,7 +367,7 @@ Status FileHeader::unserialize( } if (!file_handler->pread(buf.get(), _fixed_file_header.protobuf_length, - _fixed_file_header_size + sizeof(_extra_fixed_header))) { + _fixed_file_header_size + sizeof(_extra_fixed_header))) { char errmsg[64]; LOG(WARNING) << "fail to load protobuf from file. file=" << file_handler->file_name() << ", error=" << strerror_r(errno, errmsg, 64); @@ -412,8 +411,7 @@ Status FileHeader::unserialize( } template -Status FileHeader::validate( - const std::string& filename) { +Status FileHeader::validate(const std::string& filename) { FileHandler file_handler; Status res = Status::OK(); @@ -433,4 +431,3 @@ Status FileHeader::validate( } } // namespace doris - diff --git a/be/src/olap/file_stream.h b/be/src/olap/file_stream.h index ff71d93206..5f3115345e 100644 --- a/be/src/olap/file_stream.h +++ b/be/src/olap/file_stream.h @@ -272,4 +272,3 @@ inline Status ReadOnlyFileStream::read_all(char* buffer, uint64_t* buffer_size) } } // namespace doris - diff --git a/be/src/olap/fs/file_block_manager.cpp b/be/src/olap/fs/file_block_manager.cpp index 72b0c43d37..d66645fb25 100644 --- a/be/src/olap/fs/file_block_manager.cpp +++ b/be/src/olap/fs/file_block_manager.cpp @@ -131,7 +131,8 @@ FileWritableBlock::FileWritableBlock(FileBlockManager* block_manager, const File FileWritableBlock::~FileWritableBlock() { if (_state != CLOSED) { - WARN_IF_ERROR(abort(), strings::Substitute("Failed to close block $0", _path_desc.filepath)); + WARN_IF_ERROR(abort(), + strings::Substitute("Failed to close block $0", _path_desc.filepath)); } } @@ -162,13 +163,15 @@ Status FileWritableBlock::append(const Slice& data) { } Status FileWritableBlock::appendv(const Slice* data, size_t data_cnt) { - DCHECK(_state == CLEAN || _state == DIRTY) << "path=" << _path_desc.filepath << " invalid state=" << _state; + DCHECK(_state == CLEAN || _state == DIRTY) + << "path=" << _path_desc.filepath << " invalid state=" << _state; RETURN_IF_ERROR(_writer->appendv(data, data_cnt)); _state = DIRTY; // Calculate the amount of data written - size_t bytes_written = accumulate(data, data + data_cnt, static_cast(0), - [](size_t sum, const Slice& curr) { return sum + curr.size; }); + size_t bytes_written = + accumulate(data, data + data_cnt, static_cast(0), + [](size_t sum, const Slice& curr) { return sum + curr.size; }); _bytes_appended += bytes_written; return Status::OK(); } @@ -218,7 +221,8 @@ Status FileWritableBlock::_close(SyncMode mode) { if (sync.ok()) { sync = _block_manager->_sync_metadata(_path_desc.filepath); } - WARN_IF_ERROR(sync, strings::Substitute("Failed to sync when closing block $0", _path_desc.filepath)); + WARN_IF_ERROR(sync, strings::Substitute("Failed to sync when closing block $0", + _path_desc.filepath)); } Status close = _writer->close(); @@ -365,8 +369,7 @@ Status FileReadableBlock::readv(uint64_t offset, const Slice* results, size_t re //////////////////////////////////////////////////////////// FileBlockManager::FileBlockManager(Env* env, BlockManagerOptions opts) - : _env(DCHECK_NOTNULL(env)), - _opts(std::move(opts)) { + : _env(DCHECK_NOTNULL(env)), _opts(std::move(opts)) { if (_opts.enable_metric) { _metrics.reset(new internal::BlockManagerMetrics()); } @@ -436,10 +439,12 @@ Status FileBlockManager::delete_block(const FilePathDesc& path_desc, bool is_dir return Status::OK(); } -Status FileBlockManager::link_file(const FilePathDesc& src_path_desc, const FilePathDesc& dest_path_desc) { +Status FileBlockManager::link_file(const FilePathDesc& src_path_desc, + const FilePathDesc& dest_path_desc) { if (link(src_path_desc.filepath.c_str(), dest_path_desc.filepath.c_str()) != 0) { LOG(WARNING) << "fail to create hard link. from=" << src_path_desc.filepath << ", " - << "to=" << dest_path_desc.filepath << ", " << "errno=" << Errno::no(); + << "to=" << dest_path_desc.filepath << ", " + << "errno=" << Errno::no(); return Status::InternalError("link file failed"); } return Status::OK(); diff --git a/be/src/olap/fs/fs_util.cpp b/be/src/olap/fs/fs_util.cpp index dc8fa7ba93..221c561fc4 100644 --- a/be/src/olap/fs/fs_util.cpp +++ b/be/src/olap/fs/fs_util.cpp @@ -35,8 +35,8 @@ BlockManager* block_manager(const FilePathDesc& path_desc) { bm_opts.read_only = false; if (path_desc.is_remote()) { bm_opts.read_only = true; - std::shared_ptr storage_backend = StorageBackendMgr::instance()-> - get_storage_backend(path_desc.storage_name); + std::shared_ptr storage_backend = + StorageBackendMgr::instance()->get_storage_backend(path_desc.storage_name); if (storage_backend == nullptr) { LOG(WARNING) << "storage_backend is invalid: " << path_desc.debug_string(); return nullptr; @@ -51,25 +51,25 @@ BlockManager* block_manager(const FilePathDesc& path_desc) { StorageMediumPB get_storage_medium_pb(TStorageMedium::type t_storage_medium) { switch (t_storage_medium) { - case TStorageMedium::S3: - return StorageMediumPB::S3; - case TStorageMedium::SSD: - return StorageMediumPB::SSD; - case TStorageMedium::HDD: - default: - return StorageMediumPB::HDD; + case TStorageMedium::S3: + return StorageMediumPB::S3; + case TStorageMedium::SSD: + return StorageMediumPB::SSD; + case TStorageMedium::HDD: + default: + return StorageMediumPB::HDD; } } TStorageMedium::type get_t_storage_medium(StorageMediumPB storage_medium) { switch (storage_medium) { - case StorageMediumPB::S3: - return TStorageMedium::S3; - case StorageMediumPB::SSD: - return TStorageMedium::SSD; - case StorageMediumPB::HDD: - default: - return TStorageMedium::HDD; + case StorageMediumPB::S3: + return TStorageMedium::S3; + case StorageMediumPB::SSD: + return TStorageMedium::SSD; + case StorageMediumPB::HDD: + default: + return TStorageMedium::HDD; } } @@ -78,22 +78,22 @@ StorageParamPB get_storage_param_pb(const TStorageParam& t_storage_param) { storage_param.set_storage_medium(get_storage_medium_pb(t_storage_param.storage_medium)); storage_param.set_storage_name(t_storage_param.storage_name); switch (t_storage_param.storage_medium) { - case TStorageMedium::S3: { - S3StorageParamPB* s3_param = storage_param.mutable_s3_storage_param(); - s3_param->set_s3_endpoint(t_storage_param.s3_storage_param.s3_endpoint); - s3_param->set_s3_region(t_storage_param.s3_storage_param.s3_region); - s3_param->set_s3_ak(t_storage_param.s3_storage_param.s3_ak); - s3_param->set_s3_sk(t_storage_param.s3_storage_param.s3_sk); - s3_param->set_s3_max_conn(t_storage_param.s3_storage_param.s3_max_conn); - s3_param->set_s3_request_timeout_ms(t_storage_param.s3_storage_param.s3_request_timeout_ms); - s3_param->set_s3_conn_timeout_ms(t_storage_param.s3_storage_param.s3_conn_timeout_ms); - s3_param->set_root_path(t_storage_param.s3_storage_param.root_path); - return storage_param; - } - case TStorageMedium::SSD: - case TStorageMedium::HDD: - default: - return storage_param; + case TStorageMedium::S3: { + S3StorageParamPB* s3_param = storage_param.mutable_s3_storage_param(); + s3_param->set_s3_endpoint(t_storage_param.s3_storage_param.s3_endpoint); + s3_param->set_s3_region(t_storage_param.s3_storage_param.s3_region); + s3_param->set_s3_ak(t_storage_param.s3_storage_param.s3_ak); + s3_param->set_s3_sk(t_storage_param.s3_storage_param.s3_sk); + s3_param->set_s3_max_conn(t_storage_param.s3_storage_param.s3_max_conn); + s3_param->set_s3_request_timeout_ms(t_storage_param.s3_storage_param.s3_request_timeout_ms); + s3_param->set_s3_conn_timeout_ms(t_storage_param.s3_storage_param.s3_conn_timeout_ms); + s3_param->set_root_path(t_storage_param.s3_storage_param.root_path); + return storage_param; + } + case TStorageMedium::SSD: + case TStorageMedium::HDD: + default: + return storage_param; } } diff --git a/be/src/olap/fs/remote_block_manager.cpp b/be/src/olap/fs/remote_block_manager.cpp index 3da77a4d1e..481f0ac2d6 100644 --- a/be/src/olap/fs/remote_block_manager.cpp +++ b/be/src/olap/fs/remote_block_manager.cpp @@ -111,15 +111,14 @@ private: size_t _bytes_appended; }; -RemoteWritableBlock::RemoteWritableBlock(RemoteBlockManager* block_manager, const FilePathDesc& path_desc, +RemoteWritableBlock::RemoteWritableBlock(RemoteBlockManager* block_manager, + const FilePathDesc& path_desc, shared_ptr local_writer) : _block_manager(block_manager), _path_desc(path_desc), - _local_writer(std::move(local_writer)) { -} + _local_writer(std::move(local_writer)) {} -RemoteWritableBlock::~RemoteWritableBlock() { -} +RemoteWritableBlock::~RemoteWritableBlock() {} Status RemoteWritableBlock::close() { return Status::IOError("invalid function", 0, ""); @@ -223,11 +222,9 @@ private: RemoteReadableBlock::RemoteReadableBlock( RemoteBlockManager* block_manager, const FilePathDesc& path_desc, - std::shared_ptr> file_handle) { -} + std::shared_ptr> file_handle) {} -RemoteReadableBlock::~RemoteReadableBlock() { -} +RemoteReadableBlock::~RemoteReadableBlock() {} Status RemoteReadableBlock::close() { return Status::IOError("invalid function", 0, ""); @@ -264,10 +261,10 @@ Status RemoteReadableBlock::readv(uint64_t offset, const Slice* results, size_t // RemoteBlockManager //////////////////////////////////////////////////////////// -RemoteBlockManager::RemoteBlockManager(Env* local_env, std::shared_ptr storage_backend, +RemoteBlockManager::RemoteBlockManager(Env* local_env, + std::shared_ptr storage_backend, const BlockManagerOptions& opts) - : _local_env(local_env), _storage_backend(storage_backend), _opts(opts) { -} + : _local_env(local_env), _storage_backend(storage_backend), _opts(opts) {} RemoteBlockManager::~RemoteBlockManager() {} @@ -286,8 +283,8 @@ Status RemoteBlockManager::create_block(const CreateBlockOptions& opts, shared_ptr local_writer; WritableFileOptions wr_opts; wr_opts.mode = Env::MUST_CREATE; - RETURN_IF_ERROR(env_util::open_file_for_write( - wr_opts, Env::Default(), opts.path_desc.filepath, &local_writer)); + RETURN_IF_ERROR(env_util::open_file_for_write(wr_opts, Env::Default(), opts.path_desc.filepath, + &local_writer)); VLOG_CRITICAL << "Creating new remote block. local: " << opts.path_desc.filepath << ", remote: " << opts.path_desc.remote_path; @@ -295,9 +292,10 @@ Status RemoteBlockManager::create_block(const CreateBlockOptions& opts, return Status::OK(); } -Status RemoteBlockManager::open_block(const FilePathDesc& path_desc, std::unique_ptr* block) { - VLOG_CRITICAL << "Opening remote block. local: " - << path_desc.filepath << ", remote: " << path_desc.remote_path; +Status RemoteBlockManager::open_block(const FilePathDesc& path_desc, + std::unique_ptr* block) { + VLOG_CRITICAL << "Opening remote block. local: " << path_desc.filepath + << ", remote: " << path_desc.remote_path; std::shared_ptr> file_handle; if (Env::Default()->path_exists(path_desc.filepath).ok()) { file_handle.reset(new OpenedFileHandle()); @@ -332,12 +330,14 @@ Status RemoteBlockManager::delete_block(const FilePathDesc& path_desc, bool is_d return Status::OK(); } -Status RemoteBlockManager::link_file(const FilePathDesc& src_path_desc, const FilePathDesc& dest_path_desc) { +Status RemoteBlockManager::link_file(const FilePathDesc& src_path_desc, + const FilePathDesc& dest_path_desc) { if (_local_env->path_exists(src_path_desc.filepath).ok()) { RETURN_IF_ERROR(_local_env->link_file(src_path_desc.filepath, dest_path_desc.filepath)); } if (_storage_backend->exist(src_path_desc.remote_path).ok()) { - RETURN_IF_ERROR(_storage_backend->copy(src_path_desc.remote_path, dest_path_desc.remote_path)); + RETURN_IF_ERROR( + _storage_backend->copy(src_path_desc.remote_path, dest_path_desc.remote_path)); } return Status::OK(); } diff --git a/be/src/olap/fs/remote_block_manager.h b/be/src/olap/fs/remote_block_manager.h index 9032392626..3a35a4a84c 100644 --- a/be/src/olap/fs/remote_block_manager.h +++ b/be/src/olap/fs/remote_block_manager.h @@ -40,7 +40,8 @@ class RemoteBlockManager : public BlockManager { public: // Note: all objects passed as pointers should remain alive for the lifetime // of the block manager. - RemoteBlockManager(Env* local_env, std::shared_ptr storage_backend, const BlockManagerOptions& opts); + RemoteBlockManager(Env* local_env, std::shared_ptr storage_backend, + const BlockManagerOptions& opts); virtual ~RemoteBlockManager(); Status open() override; diff --git a/be/src/olap/generic_iterators.cpp b/be/src/olap/generic_iterators.cpp index a499f28ac6..47330907d5 100644 --- a/be/src/olap/generic_iterators.cpp +++ b/be/src/olap/generic_iterators.cpp @@ -262,9 +262,8 @@ private: int sequence_id_idx; }; - using MergeHeap = std::priority_queue, - MergeContextComparator>; + using MergeHeap = std::priority_queue, + MergeContextComparator>; MergeHeap _merge_heap; }; @@ -324,7 +323,8 @@ public: UnionIterator(std::vector& v) : _origin_iters(v.begin(), v.end()) {} ~UnionIterator() override { - std::for_each(_origin_iters.begin(), _origin_iters.end(), std::default_delete()); + std::for_each(_origin_iters.begin(), _origin_iters.end(), + std::default_delete()); } Status init(const StorageReadOptions& opts) override; diff --git a/be/src/olap/hll.h b/be/src/olap/hll.h index 7d07c1f266..70365fec9f 100644 --- a/be/src/olap/hll.h +++ b/be/src/olap/hll.h @@ -214,9 +214,9 @@ public: size_t memory_consumed() const { size_t size = sizeof(*this); - if (_type == HLL_DATA_EXPLICIT) + if (_type == HLL_DATA_EXPLICIT) size += _hash_set.size() * sizeof(uint64_t); - else if (_type == HLL_DATA_SPARSE || _type == HLL_DATA_FULL) + else if (_type == HLL_DATA_SPARSE || _type == HLL_DATA_FULL) size += HLL_REGISTERS_COUNT; return size; } @@ -275,7 +275,6 @@ private: uint8_t* _registers = nullptr; private: - void _convert_explicit_to_register(); // update one hash value into this registers @@ -381,4 +380,3 @@ public: }; } // namespace doris - diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp index 9b78a8705f..4133f7d064 100644 --- a/be/src/olap/in_list_predicate.cpp +++ b/be/src/olap/in_list_predicate.cpp @@ -26,258 +26,253 @@ namespace doris { -#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \ - template \ - CLASS::CLASS(uint32_t column_id, phmap::flat_hash_set&& values, bool opposite) \ +#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \ + template \ + CLASS::CLASS(uint32_t column_id, phmap::flat_hash_set&& values, bool opposite) \ : ColumnPredicate(column_id, opposite), _values(std::move(values)) {} IN_LIST_PRED_CONSTRUCTOR(InListPredicate) IN_LIST_PRED_CONSTRUCTOR(NotInListPredicate) -#define IN_LIST_PRED_EVALUATE(CLASS, OP) \ - template \ - void CLASS::evaluate(VectorizedRowBatch* batch) const { \ - uint16_t n = batch->size(); \ - if (n == 0) { \ - return; \ - } \ - uint16_t* sel = batch->selected(); \ - const T* col_vector = \ - reinterpret_cast(batch->column(_column_id)->col_data()); \ - uint16_t new_size = 0; \ - if (batch->column(_column_id)->no_nulls()) { \ - if (batch->selected_in_use()) { \ - for (uint16_t j = 0; j != n; ++j) { \ - uint16_t i = sel[j]; \ - sel[new_size] = i; \ - new_size += (_values.find(col_vector[i]) OP _values.end()); \ - } \ - batch->set_size(new_size); \ - } else { \ - for (uint16_t i = 0; i != n; ++i) { \ - sel[new_size] = i; \ - new_size += (_values.find(col_vector[i]) OP _values.end()); \ - } \ - if (new_size < n) { \ - batch->set_size(new_size); \ - batch->set_selected_in_use(true); \ - } \ - } \ - } else { \ - bool* is_null = batch->column(_column_id)->is_null(); \ - if (batch->selected_in_use()) { \ - for (uint16_t j = 0; j != n; ++j) { \ - uint16_t i = sel[j]; \ - sel[new_size] = i; \ - new_size += (!is_null[i] && _values.find(col_vector[i]) OP _values.end()); \ - } \ - batch->set_size(new_size); \ - } else { \ - for (int i = 0; i != n; ++i) { \ - sel[new_size] = i; \ - new_size += (!is_null[i] && _values.find(col_vector[i]) OP _values.end()); \ - } \ - if (new_size < n) { \ - batch->set_size(new_size); \ - batch->set_selected_in_use(true); \ - } \ - } \ - } \ +#define IN_LIST_PRED_EVALUATE(CLASS, OP) \ + template \ + void CLASS::evaluate(VectorizedRowBatch* batch) const { \ + uint16_t n = batch->size(); \ + if (n == 0) { \ + return; \ + } \ + uint16_t* sel = batch->selected(); \ + const T* col_vector = reinterpret_cast(batch->column(_column_id)->col_data()); \ + uint16_t new_size = 0; \ + if (batch->column(_column_id)->no_nulls()) { \ + if (batch->selected_in_use()) { \ + for (uint16_t j = 0; j != n; ++j) { \ + uint16_t i = sel[j]; \ + sel[new_size] = i; \ + new_size += (_values.find(col_vector[i]) OP _values.end()); \ + } \ + batch->set_size(new_size); \ + } else { \ + for (uint16_t i = 0; i != n; ++i) { \ + sel[new_size] = i; \ + new_size += (_values.find(col_vector[i]) OP _values.end()); \ + } \ + if (new_size < n) { \ + batch->set_size(new_size); \ + batch->set_selected_in_use(true); \ + } \ + } \ + } else { \ + bool* is_null = batch->column(_column_id)->is_null(); \ + if (batch->selected_in_use()) { \ + for (uint16_t j = 0; j != n; ++j) { \ + uint16_t i = sel[j]; \ + sel[new_size] = i; \ + new_size += (!is_null[i] && _values.find(col_vector[i]) OP _values.end()); \ + } \ + batch->set_size(new_size); \ + } else { \ + for (int i = 0; i != n; ++i) { \ + sel[new_size] = i; \ + new_size += (!is_null[i] && _values.find(col_vector[i]) OP _values.end()); \ + } \ + if (new_size < n) { \ + batch->set_size(new_size); \ + batch->set_selected_in_use(true); \ + } \ + } \ + } \ } IN_LIST_PRED_EVALUATE(InListPredicate, !=) IN_LIST_PRED_EVALUATE(NotInListPredicate, ==) -#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ - template \ - void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ - uint16_t new_size = 0; \ - if (block->is_nullable()) { \ - for (uint16_t i = 0; i < *size; ++i) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \ - OP _values.end()); \ - new_size += _opposite ? !result : result; \ - } \ - } else { \ - for (uint16_t i = 0; i < *size; ++i) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (_values.find(*cell_value) OP _values.end()); \ - new_size += _opposite ? !result : result; \ - } \ - } \ - *size = new_size; \ +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ + template \ + void CLASS::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ + uint16_t new_size = 0; \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \ + OP _values.end()); \ + new_size += _opposite ? !result : result; \ + } \ + } else { \ + for (uint16_t i = 0; i < *size; ++i) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (_values.find(*cell_value) OP _values.end()); \ + new_size += _opposite ? !result : result; \ + } \ + } \ + *size = new_size; \ } IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, !=) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) // todo(zeno) define interface in IColumn to simplify code -#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP) \ - template \ - void CLASS::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ - uint16_t new_size = 0; \ - if (column.is_nullable()) { \ - auto* nullable_col = \ - vectorized::check_and_get_column(column); \ - auto& null_bitmap = reinterpret_cast( \ - nullable_col->get_null_map_column()).get_data(); \ - auto& nested_col = nullable_col->get_nested_column(); \ - if (nested_col.is_column_dictionary()) { \ - if constexpr (std::is_same_v) { \ - auto* nested_col_ptr = vectorized::check_and_get_column< \ - vectorized::ColumnDictionary>(nested_col); \ - auto& data_array = nested_col_ptr->get_data(); \ - auto dict_codes = nested_col_ptr->find_codes(_values); \ - for (uint16_t i = 0; i < *size; i++) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const auto& cell_value = data_array[idx]; \ - bool ret = !null_bitmap[idx] \ - && (dict_codes.find(cell_value) OP dict_codes.end()); \ - new_size += _opposite ? !ret : ret; \ - } \ - } \ - } else { \ - auto* nested_col_ptr = vectorized::check_and_get_column< \ - vectorized::PredicateColumnType>(nested_col); \ - auto& data_array = nested_col_ptr->get_data(); \ - for (uint16_t i = 0; i < *size; i++) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const auto& cell_value = reinterpret_cast(data_array[idx]); \ - bool ret = !null_bitmap[idx] && (_values.find(cell_value) OP _values.end()); \ - new_size += _opposite ? !ret : ret; \ - } \ - } \ - } else if (column.is_column_dictionary()) { \ - if constexpr (std::is_same_v) { \ - auto& dict_col = \ - reinterpret_cast&>( \ - column); \ - auto& data_array = dict_col.get_data(); \ - auto dict_codes = dict_col.find_codes(_values); \ - for (uint16_t i = 0; i < *size; i++) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const auto& cell_value = data_array[idx]; \ - auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \ - new_size += _opposite ? !result : result; \ - } \ - } \ - } else { \ - auto& number_column = reinterpret_cast&>(column); \ - auto& data_array = number_column.get_data(); \ - for (uint16_t i = 0; i < *size; i++) { \ - uint16_t idx = sel[i]; \ - sel[new_size] = idx; \ - const auto& cell_value = reinterpret_cast(data_array[idx]); \ - auto result = (_values.find(cell_value) OP _values.end()); \ - new_size += _opposite ? !result : result; \ - } \ - } \ - *size = new_size; \ +#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP) \ + template \ + void CLASS::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ + uint16_t new_size = 0; \ + if (column.is_nullable()) { \ + auto* nullable_col = \ + vectorized::check_and_get_column(column); \ + auto& null_bitmap = reinterpret_cast( \ + nullable_col->get_null_map_column()) \ + .get_data(); \ + auto& nested_col = nullable_col->get_nested_column(); \ + if (nested_col.is_column_dictionary()) { \ + if constexpr (std::is_same_v) { \ + auto* nested_col_ptr = vectorized::check_and_get_column< \ + vectorized::ColumnDictionary>(nested_col); \ + auto& data_array = nested_col_ptr->get_data(); \ + auto dict_codes = nested_col_ptr->find_codes(_values); \ + for (uint16_t i = 0; i < *size; i++) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const auto& cell_value = data_array[idx]; \ + bool ret = !null_bitmap[idx] && \ + (dict_codes.find(cell_value) OP dict_codes.end()); \ + new_size += _opposite ? !ret : ret; \ + } \ + } \ + } else { \ + auto* nested_col_ptr = \ + vectorized::check_and_get_column>( \ + nested_col); \ + auto& data_array = nested_col_ptr->get_data(); \ + for (uint16_t i = 0; i < *size; i++) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const auto& cell_value = reinterpret_cast(data_array[idx]); \ + bool ret = !null_bitmap[idx] && (_values.find(cell_value) OP _values.end()); \ + new_size += _opposite ? !ret : ret; \ + } \ + } \ + } else if (column.is_column_dictionary()) { \ + if constexpr (std::is_same_v) { \ + auto& dict_col = \ + reinterpret_cast&>( \ + column); \ + auto& data_array = dict_col.get_data(); \ + auto dict_codes = dict_col.find_codes(_values); \ + for (uint16_t i = 0; i < *size; i++) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const auto& cell_value = data_array[idx]; \ + auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \ + new_size += _opposite ? !result : result; \ + } \ + } \ + } else { \ + auto& number_column = reinterpret_cast&>(column); \ + auto& data_array = number_column.get_data(); \ + for (uint16_t i = 0; i < *size; i++) { \ + uint16_t idx = sel[i]; \ + sel[new_size] = idx; \ + const auto& cell_value = reinterpret_cast(data_array[idx]); \ + auto result = (_values.find(cell_value) OP _values.end()); \ + new_size += _opposite ? !result : result; \ + } \ + } \ + *size = new_size; \ } IN_LIST_PRED_COLUMN_EVALUATE(InListPredicate, !=) IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==) -#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \ - template \ - void CLASS::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ - const { \ - if (block->is_nullable()) { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \ - OP _values.end()); \ - flags[i] |= _opposite ? !result : result; \ - } \ - } else { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (_values.find(*cell_value) OP _values.end()); \ - flags[i] |= _opposite ? !result : result; \ - } \ - } \ +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \ + template \ + void CLASS::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ + const { \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \ + OP _values.end()); \ + flags[i] |= _opposite ? !result : result; \ + } \ + } else { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (_values.find(*cell_value) OP _values.end()); \ + flags[i] |= _opposite ? !result : result; \ + } \ + } \ } IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(InListPredicate, !=) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(NotInListPredicate, ==) -#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP) \ - template \ - void CLASS::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ - const { \ - if (block->is_nullable()) { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (!flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \ - OP _values.end()); \ - flags[i] &= _opposite ? !result : result; \ - } \ - } else { \ - for (uint16_t i = 0; i < size; ++i) { \ - if (!flags[i]) continue; \ - uint16_t idx = sel[i]; \ - const T* cell_value = \ - reinterpret_cast(block->cell(idx).cell_ptr()); \ - auto result = (_values.find(*cell_value) OP _values.end()); \ - flags[i] &= _opposite ? !result : result; \ - } \ - } \ +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP) \ + template \ + void CLASS::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ + const { \ + if (block->is_nullable()) { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \ + OP _values.end()); \ + flags[i] &= _opposite ? !result : result; \ + } \ + } else { \ + for (uint16_t i = 0; i < size; ++i) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + const T* cell_value = reinterpret_cast(block->cell(idx).cell_ptr()); \ + auto result = (_values.find(*cell_value) OP _values.end()); \ + flags[i] &= _opposite ? !result : result; \ + } \ + } \ } IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(InListPredicate, !=) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==) -#define IN_LIST_PRED_BITMAP_EVALUATE(CLASS, OP) \ - template \ - Status CLASS::evaluate(const Schema& schema, \ - const std::vector& iterators, \ - uint32_t num_rows, roaring::Roaring* result) const { \ - BitmapIndexIterator* iterator = iterators[_column_id]; \ - if (iterator == nullptr) { \ - return Status::OK(); \ - } \ - if (iterator->has_null_bitmap()) { \ - roaring::Roaring null_bitmap; \ - RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap)); \ - *result -= null_bitmap; \ - } \ - roaring::Roaring indices; \ - for (auto value : _values) { \ - bool exact_match; \ - Status s = iterator->seek_dictionary(&value, &exact_match); \ - rowid_t seeked_ordinal = iterator->current_ordinal(); \ - if (!s.is_not_found()) { \ - if (!s.ok()) { \ - return s; \ - } \ - if (exact_match) { \ - roaring::Roaring index; \ - RETURN_IF_ERROR(iterator->read_bitmap(seeked_ordinal, &index)); \ - indices |= index; \ - } \ - } \ - } \ - *result OP indices; \ - return Status::OK(); \ +#define IN_LIST_PRED_BITMAP_EVALUATE(CLASS, OP) \ + template \ + Status CLASS::evaluate(const Schema& schema, \ + const std::vector& iterators, \ + uint32_t num_rows, roaring::Roaring* result) const { \ + BitmapIndexIterator* iterator = iterators[_column_id]; \ + if (iterator == nullptr) { \ + return Status::OK(); \ + } \ + if (iterator->has_null_bitmap()) { \ + roaring::Roaring null_bitmap; \ + RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap)); \ + *result -= null_bitmap; \ + } \ + roaring::Roaring indices; \ + for (auto value : _values) { \ + bool exact_match; \ + Status s = iterator->seek_dictionary(&value, &exact_match); \ + rowid_t seeked_ordinal = iterator->current_ordinal(); \ + if (!s.is_not_found()) { \ + if (!s.ok()) { \ + return s; \ + } \ + if (exact_match) { \ + roaring::Roaring index; \ + RETURN_IF_ERROR(iterator->read_bitmap(seeked_ordinal, &index)); \ + indices |= index; \ + } \ + } \ + } \ + *result OP indices; \ + return Status::OK(); \ } IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=) @@ -390,4 +385,4 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate) IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate) IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate) -} //namespace doris +} //namespace doris \ No newline at end of file diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index cca6b44c9e..20231f724a 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -93,8 +93,11 @@ class VectorizedRowBatch; const std::vector& iterators, \ uint32_t num_rows, roaring::Roaring* bitmap) const override; \ void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \ - void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ - void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ + void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ + bool* flags) const override {} \ + void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ + bool* flags) const override {} \ + \ private: \ phmap::flat_hash_set _values; \ }; @@ -102,4 +105,4 @@ class VectorizedRowBatch; IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST) IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST) -} //namespace doris +} //namespace doris \ No newline at end of file diff --git a/be/src/olap/in_stream.cpp b/be/src/olap/in_stream.cpp index 8e8e297ff8..3081498675 100644 --- a/be/src/olap/in_stream.cpp +++ b/be/src/olap/in_stream.cpp @@ -257,8 +257,8 @@ Status InStream::seek(PositionProvider* position) { res = _uncompressed->set_position(uncompressed_bytes); if (!res.ok()) { - LOG(WARNING) << "fail to set position. res= " << res - << ", position=" << (_uncompressed->position() + uncompressed_bytes); + LOG(WARNING) << "fail to set position. res= " << res + << ", position=" << (_uncompressed->position() + uncompressed_bytes); return res; } } else if (_uncompressed != nullptr) { @@ -266,7 +266,8 @@ Status InStream::seek(PositionProvider* position) { res = _uncompressed->set_position(_uncompressed->limit()); if (!res.ok()) { - LOG(WARNING) << "fail to set position.res=" << res << ", position=" << _uncompressed->limit(); + LOG(WARNING) << "fail to set position.res=" << res + << ", position=" << _uncompressed->limit(); return res; } } diff --git a/be/src/olap/in_stream.h b/be/src/olap/in_stream.h index 29bae381e2..6e054c9e1a 100644 --- a/be/src/olap/in_stream.h +++ b/be/src/olap/in_stream.h @@ -189,4 +189,3 @@ inline Status InStream::read(char* buffer, uint64_t* buf_size) { } } // namespace doris - diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index 4609bd0a52..c13b8d894d 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -71,7 +71,8 @@ public: // delete conditions used by column index to filter pages std::vector delete_conditions; - std::shared_ptr delete_condition_predicates = std::make_shared(); + std::shared_ptr delete_condition_predicates = + std::make_shared(); // reader's column predicate, nullptr if not existed // used to fiter rows in row block // TODO(hkp): refactor the column predicate framework @@ -101,9 +102,13 @@ public: // into input batch with Status::OK() returned // If there is no data to read, will return Status::EndOfFile. // If other error happens, other error code will be returned. - virtual Status next_batch(RowBlockV2* block) { return Status::NotSupported("to be implemented"); } + virtual Status next_batch(RowBlockV2* block) { + return Status::NotSupported("to be implemented"); + } - virtual Status next_batch(vectorized::Block* block) { return Status::NotSupported("to be implemented"); } + virtual Status next_batch(vectorized::Block* block) { + return Status::NotSupported("to be implemented"); + } // return schema for this Iterator virtual const Schema& schema() const = 0; diff --git a/be/src/olap/key_coder.h b/be/src/olap/key_coder.h index b83bded315..b6afcf6c9e 100644 --- a/be/src/olap/key_coder.h +++ b/be/src/olap/key_coder.h @@ -273,8 +273,8 @@ public: static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr, MemPool* pool) { CHECK(encoded_key->size <= index_size) - << "encoded_key size is larger than index_size, key_size=" << encoded_key->size - << ", index_size=" << index_size; + << "encoded_key size is larger than index_size, key_size=" << encoded_key->size + << ", index_size=" << index_size; auto copy_size = encoded_key->size; Slice* slice = (Slice*)cell_ptr; slice->data = (char*)pool->allocate(copy_size); diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 4d320c2f90..fdf2548839 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -395,4 +395,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 993f0582ac..7e8d2fa041 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -33,8 +33,7 @@ namespace doris { MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema, const std::vector* slot_descs, TupleDescriptor* tuple_desc, KeysType keys_type, RowsetWriter* rowset_writer, - const std::shared_ptr& parent_tracker, - bool support_vec) + const std::shared_ptr& parent_tracker, bool support_vec) : _tablet_id(tablet_id), _schema(schema), _tablet_schema(tablet_schema), @@ -45,39 +44,41 @@ MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet _table_mem_pool(new MemPool(_mem_tracker.get())), _schema_size(_schema->schema_size()), _rowset_writer(rowset_writer), - _is_first_insertion(true), + _is_first_insertion(true), _agg_functions(schema->num_columns()), - _mem_usage(0){ + _mem_usage(0) { if (support_vec) { _skip_list = nullptr; _vec_row_comparator = std::make_shared(_schema); // TODO: Support ZOrderComparator in the future _vec_skip_list = new VecTable(_vec_row_comparator.get(), _table_mem_pool.get(), - _keys_type == KeysType::DUP_KEYS); - }else{ + _keys_type == KeysType::DUP_KEYS); + } else { _vec_skip_list = nullptr; if (tablet_schema->sort_type() == SortType::ZORDER) { - _row_comparator = - std::make_shared(_schema, tablet_schema->sort_col_num()); + _row_comparator = std::make_shared( + _schema, tablet_schema->sort_col_num()); } else { _row_comparator = std::make_shared(_schema); } _skip_list = new Table(_row_comparator.get(), _table_mem_pool.get(), - _keys_type == KeysType::DUP_KEYS); + _keys_type == KeysType::DUP_KEYS); } } void MemTable::_init_agg_functions(const vectorized::Block* block) { for (uint32_t cid = _schema->num_key_columns(); cid < _schema->num_columns(); ++cid) { FieldAggregationMethod agg_method = _tablet_schema->column(cid).aggregation(); - std::string agg_name = - TabletColumn::get_string_by_aggregation_type(agg_method) + vectorized::AGG_LOAD_SUFFIX; - std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(), [](unsigned char c) { return std::tolower(c); }); + std::string agg_name = TabletColumn::get_string_by_aggregation_type(agg_method) + + vectorized::AGG_LOAD_SUFFIX; + std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(), + [](unsigned char c) { return std::tolower(c); }); // create aggregate function - vectorized::DataTypes argument_types{block->get_data_type(cid)}; - vectorized::AggregateFunctionPtr function = vectorized::AggregateFunctionSimpleFactory::instance().get( - agg_name, argument_types, {}, argument_types.back()->is_nullable()); + vectorized::DataTypes argument_types {block->get_data_type(cid)}; + vectorized::AggregateFunctionPtr function = + vectorized::AggregateFunctionSimpleFactory::instance().get( + agg_name, argument_types, {}, argument_types.back()->is_nullable()); DCHECK(function != nullptr); _agg_functions[cid] = function; @@ -88,8 +89,7 @@ MemTable::~MemTable() { delete _skip_list; delete _vec_skip_list; - std::for_each(_row_in_blocks.begin(), _row_in_blocks.end(), - std::default_delete()); + std::for_each(_row_in_blocks.begin(), _row_in_blocks.end(), std::default_delete()); _mem_tracker->release(_mem_usage); } @@ -101,10 +101,10 @@ int MemTable::RowCursorComparator::operator()(const char* left, const char* righ return compare_row(lhs_row, rhs_row); } -int MemTable::RowInBlockComparator::operator()(const RowInBlock* left, const RowInBlock* right) const{ - return _pblock->compare_at(left->_row_pos, right->_row_pos, - _schema->num_key_columns(), - *_pblock, -1); +int MemTable::RowInBlockComparator::operator()(const RowInBlock* left, + const RowInBlock* right) const { + return _pblock->compare_at(left->_row_pos, right->_row_pos, _schema->num_key_columns(), + *_pblock, -1); } void MemTable::insert(const vectorized::Block* block, size_t row_pos, size_t num_rows) { @@ -114,7 +114,7 @@ void MemTable::insert(const vectorized::Block* block, size_t row_pos, size_t num _input_mutable_block = vectorized::MutableBlock::build_mutable_block(&cloneBlock); _vec_row_comparator->set_block(&_input_mutable_block); _output_mutable_block = vectorized::MutableBlock::build_mutable_block(&cloneBlock); - if (_keys_type != KeysType::DUP_KEYS){ + if (_keys_type != KeysType::DUP_KEYS) { _init_agg_functions(block); } } @@ -125,10 +125,10 @@ void MemTable::insert(const vectorized::Block* block, size_t row_pos, size_t num _mem_usage += newsize - oldsize; _mem_tracker->consume(newsize - oldsize); - for(int i = 0; i < num_rows; i++){ - _row_in_blocks.emplace_back(new RowInBlock{cursor_in_mutableblock + i}); + for (int i = 0; i < num_rows; i++) { + _row_in_blocks.emplace_back(new RowInBlock {cursor_in_mutableblock + i}); _insert_one_row_from_block(_row_in_blocks.back()); - } + } } void MemTable::_insert_one_row_from_block(RowInBlock* row_in_block) { @@ -142,17 +142,18 @@ void MemTable::_insert_one_row_from_block(RowInBlock* row_in_block) { } bool is_exist = _vec_skip_list->Find(row_in_block, &_vec_hint); - if (is_exist){ + if (is_exist) { _aggregate_two_row_in_block(row_in_block, _vec_hint.curr->key); } else { row_in_block->init_agg_places(_agg_functions, _schema->num_key_columns()); - for (auto cid = _schema->num_key_columns(); cid < _schema->num_columns(); cid++){ + for (auto cid = _schema->num_key_columns(); cid < _schema->num_columns(); cid++) { auto col_ptr = _input_mutable_block.mutable_columns()[cid].get(); auto place = row_in_block->_agg_places[cid]; - _agg_functions[cid]->add(place, const_cast(&col_ptr), + _agg_functions[cid]->add(place, + const_cast(&col_ptr), row_in_block->_row_pos, nullptr); } - + _vec_skip_list->InsertWithHint(row_in_block, is_exist, &_vec_hint); } } @@ -216,12 +217,13 @@ void MemTable::_aggregate_two_row(const ContiguousRow& src_row, TableKey row_in_ } } -void MemTable::_aggregate_two_row_in_block(RowInBlock* new_row, RowInBlock* row_in_skiplist){ +void MemTable::_aggregate_two_row_in_block(RowInBlock* new_row, RowInBlock* row_in_skiplist) { if (_tablet_schema->has_sequence_col()) { auto sequence_idx = _tablet_schema->sequence_col_idx(); - auto res = _input_mutable_block.compare_at(row_in_skiplist->_row_pos, new_row->_row_pos, sequence_idx, _input_mutable_block, -1); + auto res = _input_mutable_block.compare_at(row_in_skiplist->_row_pos, new_row->_row_pos, + sequence_idx, _input_mutable_block, -1); // dst sequence column larger than src, don't need to update - if (res > 0){ + if (res > 0) { return; } } @@ -230,15 +232,14 @@ void MemTable::_aggregate_two_row_in_block(RowInBlock* new_row, RowInBlock* row_ auto place = row_in_skiplist->_agg_places[cid]; auto col_ptr = _input_mutable_block.mutable_columns()[cid].get(); _agg_functions[cid]->add(place, const_cast(&col_ptr), - new_row->_row_pos, nullptr); - } - + new_row->_row_pos, nullptr); + } } vectorized::Block MemTable::_collect_vskiplist_results() { VecTable::Iterator it(_vec_skip_list); vectorized::Block in_block = _input_mutable_block.to_block(); // TODO: should try to insert data by column, not by row. to opt the the code - if (_keys_type == KeysType::DUP_KEYS){ + if (_keys_type == KeysType::DUP_KEYS) { for (it.SeekToFirst(); it.Valid(); it.Next()) { _output_mutable_block.add_row(&in_block, it.key()->_row_pos); } @@ -247,12 +248,14 @@ vectorized::Block MemTable::_collect_vskiplist_results() { auto& block_data = in_block.get_columns_with_type_and_name(); // move key columns for (size_t i = 0; i < _schema->num_key_columns(); ++i) { - _output_mutable_block.get_column_by_position(i)->insert_from(*block_data[i].column.get(), it.key()->_row_pos); + _output_mutable_block.get_column_by_position(i)->insert_from( + *block_data[i].column.get(), it.key()->_row_pos); } // get value columns from agg_places for (size_t i = _schema->num_key_columns(); i < _schema->num_columns(); ++i) { auto function = _agg_functions[i]; - function->insert_result_into(it.key()->_agg_places[i] , *(_output_mutable_block.get_column_by_position(i))); + function->insert_result_into(it.key()->_agg_places[i], + *(_output_mutable_block.get_column_by_position(i))); function->destroy(it.key()->_agg_places[i]); } } diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index b034368f63..ed3cbc5fa3 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -43,18 +43,17 @@ public: MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema, const std::vector* slot_descs, TupleDescriptor* tuple_desc, KeysType keys_type, RowsetWriter* rowset_writer, - const std::shared_ptr& parent_tracker, - bool support_vec = false); + const std::shared_ptr& parent_tracker, bool support_vec = false); ~MemTable(); int64_t tablet_id() const { return _tablet_id; } size_t memory_usage() const { return _mem_tracker->consumption(); } std::shared_ptr& mem_tracker() { return _mem_tracker; } - + void insert(const Tuple* tuple); // insert tuple from (row_pos) to (row_pos+num_rows) void insert(const vectorized::Block* block, size_t row_pos, size_t num_rows); - + /// Flush Status flush(); Status close(); @@ -80,9 +79,9 @@ private: explicit RowInBlock(size_t i) : _row_pos(i) {} void init_agg_places(std::vector& agg_functions, - int key_column_count) { + int key_column_count) { _agg_places.resize(agg_functions.size()); - for(int cid = 0; cid < agg_functions.size(); cid++) { + for (int cid = 0; cid < agg_functions.size(); cid++) { if (cid < key_column_count) { _agg_places[cid] = nullptr; } else { @@ -96,7 +95,7 @@ private: ~RowInBlock() { for (auto agg_place : _agg_places) { - delete [] agg_place; + delete[] agg_place; } } }; @@ -107,11 +106,12 @@ private: // call set_block before operator(). // only first time insert block to create _input_mutable_block, // so can not Comparator of construct to set pblock - void set_block(vectorized::MutableBlock* pblock) {_pblock = pblock;} + void set_block(vectorized::MutableBlock* pblock) { _pblock = pblock; } int operator()(const RowInBlock* left, const RowInBlock* right) const; + private: const Schema* _schema; - vectorized::MutableBlock* _pblock;// 对应Memtable::_input_mutable_block + vectorized::MutableBlock* _pblock; // 对应Memtable::_input_mutable_block }; private: @@ -137,7 +137,6 @@ public: Table::Iterator _it; }; - private: void _tuple_to_row(const Tuple* tuple, ContiguousRow* row, MemPool* mem_pool); void _aggregate_two_row(const ContiguousRow& new_row, TableKey row_in_skiplist); @@ -154,7 +153,7 @@ private: // TODO: change to unique_ptr of comparator std::shared_ptr _row_comparator; - + std::shared_ptr _vec_row_comparator; std::shared_ptr _mem_tracker; @@ -187,7 +186,7 @@ private: // in unique or aggragate key model. int64_t _rows = 0; - //for vectorized + //for vectorized vectorized::MutableBlock _input_mutable_block; vectorized::MutableBlock _output_mutable_block; vectorized::Block _collect_vskiplist_results(); @@ -199,7 +198,6 @@ private: size_t _mem_usage; }; // class MemTable - inline std::ostream& operator<<(std::ostream& os, const MemTable& table) { os << "MemTable(addr=" << &table << ", tablet=" << table.tablet_id() << ", mem=" << table.memory_usage(); @@ -207,4 +205,3 @@ inline std::ostream& operator<<(std::ostream& os, const MemTable& table) { } } // namespace doris - diff --git a/be/src/olap/memtable_flush_executor.cpp b/be/src/olap/memtable_flush_executor.cpp index 6031304984..1b25c07197 100644 --- a/be/src/olap/memtable_flush_executor.cpp +++ b/be/src/olap/memtable_flush_executor.cpp @@ -29,8 +29,7 @@ namespace doris { std::ostream& operator<<(std::ostream& os, const FlushStatistic& stat) { os << "(flush time(ms)=" << stat.flush_time_ns / NANOS_PER_MILLIS << ", flush wait time(ms)=" << stat.flush_wait_time_ns / NANOS_PER_MILLIS - << ", flush count=" << stat.flush_count - << ", flush bytes: " << stat.flush_size_bytes + << ", flush count=" << stat.flush_count << ", flush bytes: " << stat.flush_size_bytes << ", flush disk bytes: " << stat.flush_disk_size_bytes << ")"; return os; } @@ -46,7 +45,8 @@ Status FlushToken::submit(const std::shared_ptr& memtable) { return Status::OLAPInternalError(s); } int64_t submit_task_time = MonotonicNanos(); - _flush_token->submit_func(std::bind(&FlushToken::_flush_memtable, this, memtable, submit_task_time)); + _flush_token->submit_func( + std::bind(&FlushToken::_flush_memtable, this, memtable, submit_task_time)); return Status::OK(); } @@ -82,9 +82,8 @@ void FlushToken::_flush_memtable(std::shared_ptr memtable, int64_t sub } VLOG_CRITICAL << "flush memtable cost: " << timer.elapsed_time() - << ", count: " << _stats.flush_count - << ", mem size: " << memtable->memory_usage() - << ", disk size: " << memtable->flush_size(); + << ", count: " << _stats.flush_count << ", mem size: " << memtable->memory_usage() + << ", disk size: " << memtable->flush_size(); _stats.flush_time_ns += timer.elapsed_time(); _stats.flush_count++; _stats.flush_size_bytes += memtable->memory_usage(); @@ -109,24 +108,27 @@ void MemTableFlushExecutor::init(const std::vector& data_dirs) { } // NOTE: we use SERIAL mode here to ensure all mem-tables from one tablet are flushed in order. -Status MemTableFlushExecutor::create_flush_token( - std::unique_ptr* flush_token, - RowsetTypePB rowset_type, bool is_high_priority) { +Status MemTableFlushExecutor::create_flush_token(std::unique_ptr* flush_token, + RowsetTypePB rowset_type, bool is_high_priority) { if (!is_high_priority) { if (rowset_type == BETA_ROWSET) { // beta rowset can be flush in CONCURRENT, because each memtable using a new segment writer. - flush_token->reset(new FlushToken(_flush_pool->new_token(ThreadPool::ExecutionMode::CONCURRENT))); + flush_token->reset( + new FlushToken(_flush_pool->new_token(ThreadPool::ExecutionMode::CONCURRENT))); } else { // alpha rowset do not support flush in CONCURRENT. - flush_token->reset(new FlushToken(_flush_pool->new_token(ThreadPool::ExecutionMode::SERIAL))); + flush_token->reset( + new FlushToken(_flush_pool->new_token(ThreadPool::ExecutionMode::SERIAL))); } } else { if (rowset_type == BETA_ROWSET) { // beta rowset can be flush in CONCURRENT, because each memtable using a new segment writer. - flush_token->reset(new FlushToken(_high_prio_flush_pool->new_token(ThreadPool::ExecutionMode::CONCURRENT))); + flush_token->reset(new FlushToken( + _high_prio_flush_pool->new_token(ThreadPool::ExecutionMode::CONCURRENT))); } else { // alpha rowset do not support flush in CONCURRENT. - flush_token->reset(new FlushToken(_high_prio_flush_pool->new_token(ThreadPool::ExecutionMode::SERIAL))); + flush_token->reset(new FlushToken( + _high_prio_flush_pool->new_token(ThreadPool::ExecutionMode::SERIAL))); } } return Status::OK(); diff --git a/be/src/olap/memtable_flush_executor.h b/be/src/olap/memtable_flush_executor.h index 274f583c43..36d5c21be8 100644 --- a/be/src/olap/memtable_flush_executor.h +++ b/be/src/olap/memtable_flush_executor.h @@ -101,9 +101,8 @@ public: // because it needs path hash of each data dir. void init(const std::vector& data_dirs); - Status create_flush_token( - std::unique_ptr* flush_token, - RowsetTypePB rowset_type, bool is_high_priority); + Status create_flush_token(std::unique_ptr* flush_token, RowsetTypePB rowset_type, + bool is_high_priority); private: std::unique_ptr _flush_pool; diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index b7d50babb6..42759f6b6f 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -30,9 +30,8 @@ namespace doris { Status Merger::merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, - const std::vector& src_rowset_readers, - RowsetWriter* dst_rowset_writer, - Merger::Statistics* stats_output) { + const std::vector& src_rowset_readers, + RowsetWriter* dst_rowset_writer, Merger::Statistics* stats_output) { TRACE_COUNTER_SCOPE_LATENCY_US("merge_rowsets_latency_us"); TupleReader reader; @@ -89,8 +88,8 @@ Status Merger::merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, } Status Merger::vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, - const std::vector& src_rowset_readers, - RowsetWriter* dst_rowset_writer, Statistics* stats_output) { + const std::vector& src_rowset_readers, + RowsetWriter* dst_rowset_writer, Statistics* stats_output) { TRACE_COUNTER_SCOPE_LATENCY_US("merge_rowsets_latency_us"); vectorized::BlockReader reader; @@ -105,7 +104,7 @@ Status Merger::vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, std::iota(reader_params.return_columns.begin(), reader_params.return_columns.end(), 0); reader_params.origin_return_columns = &reader_params.return_columns; RETURN_NOT_OK(reader.init(reader_params)); - + vectorized::Block block = schema.create_block(reader_params.return_columns); size_t output_rows = 0; while (true) { diff --git a/be/src/olap/merger.h b/be/src/olap/merger.h index e8ae558505..5f258779fb 100644 --- a/be/src/olap/merger.h +++ b/be/src/olap/merger.h @@ -36,13 +36,12 @@ public: // return OLAP_SUCCESS and set statistics into `*stats_output`. // return others on error static Status merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, - const std::vector& src_rowset_readers, - RowsetWriter* dst_rowset_writer, Statistics* stats_output); + const std::vector& src_rowset_readers, + RowsetWriter* dst_rowset_writer, Statistics* stats_output); static Status vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, - const std::vector& src_rowset_readers, - RowsetWriter* dst_rowset_writer, Statistics* stats_output); + const std::vector& src_rowset_readers, + RowsetWriter* dst_rowset_writer, Statistics* stats_output); }; } // namespace doris - diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h index cd724f9eed..ed78066941 100644 --- a/be/src/olap/null_predicate.h +++ b/be/src/olap/null_predicate.h @@ -57,4 +57,3 @@ private: }; } //namespace doris - diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index fab987bb06..3afa23f983 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -388,4 +388,3 @@ struct RowsetId { }; } // namespace doris - diff --git a/be/src/olap/olap_cond.h b/be/src/olap/olap_cond.h index a86afaf47a..1282fae0f1 100644 --- a/be/src/olap/olap_cond.h +++ b/be/src/olap/olap_cond.h @@ -199,4 +199,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index d24695607b..6fb4b6cb21 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -138,20 +138,20 @@ const std::string ROWSET_ID_PREFIX = "s_"; #endif #ifndef RETURN_NOT_OK -#define RETURN_NOT_OK(s) \ - do { \ - Status _s = (s); \ +#define RETURN_NOT_OK(s) \ + do { \ + Status _s = (s); \ if (OLAP_UNLIKELY(!_s.ok())) { \ - return _s; \ - } \ + return _s; \ + } \ } while (0); #endif #ifndef RETURN_NOT_OK_LOG #define RETURN_NOT_OK_LOG(s, msg) \ do { \ - Status _s = (s); \ - if (OLAP_UNLIKELY(!_s)) { \ + Status _s = (s); \ + if (OLAP_UNLIKELY(!_s)) { \ LOG(WARNING) << (msg) << "[res=" << _s << "]"; \ return _s; \ } \ diff --git a/be/src/olap/olap_index.cpp b/be/src/olap/olap_index.cpp index 95a4d2a700..22797b635e 100644 --- a/be/src/olap/olap_index.cpp +++ b/be/src/olap/olap_index.cpp @@ -52,7 +52,7 @@ MemIndex::~MemIndex() { } Status MemIndex::load_segment(const char* file, size_t* current_num_rows_per_row_block, - bool use_cache) { + bool use_cache) { Status res = Status::OK(); SegmentMetaInfo meta; @@ -315,7 +315,7 @@ Status MemIndex::load_segment(const char* file, size_t* current_num_rows_per_row } Status MemIndex::init(size_t short_key_len, size_t new_short_key_len, size_t short_key_num, - std::vector* short_key_columns) { + std::vector* short_key_columns) { if (short_key_columns == nullptr) { LOG(WARNING) << "fail to init MemIndex, nullptr short key columns."; return Status::OLAPInternalError(OLAP_ERR_INDEX_LOAD_ERROR); @@ -469,8 +469,7 @@ Status MemIndex::get_entry(const OLAPIndexOffset& pos, EntrySlice* slice) const return Status::OK(); } -Status MemIndex::get_row_block_position(const OLAPIndexOffset& pos, - RowBlockPosition* rbp) const { +Status MemIndex::get_row_block_position(const OLAPIndexOffset& pos, RowBlockPosition* rbp) const { if (zero_num_rows()) { return Status::OLAPInternalError(OLAP_ERR_INDEX_EOF); } diff --git a/be/src/olap/olap_index.h b/be/src/olap/olap_index.h index 191d4e151f..d7a3deaa33 100644 --- a/be/src/olap/olap_index.h +++ b/be/src/olap/olap_index.h @@ -159,11 +159,11 @@ public: // 初始化MemIndex, 传入short_key的总长度和对应的Field数组 Status init(size_t short_key_len, size_t new_short_key_len, size_t short_key_num, - std::vector* short_key_columns); + std::vector* short_key_columns); // 加载一个segment到内存 Status load_segment(const char* file, size_t* current_num_rows_per_row_block, - bool use_cache = true); + bool use_cache = true); // Return the IndexOffset of the first element, physically, it's (0, 0) const OLAPIndexOffset begin() const { @@ -378,4 +378,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/olap_meta.cpp b/be/src/olap/olap_meta.cpp index ff6ca5325a..963822b480 100644 --- a/be/src/olap/olap_meta.cpp +++ b/be/src/olap/olap_meta.cpp @@ -82,8 +82,7 @@ Status OlapMeta::init() { return Status::OK(); } -Status OlapMeta::get(const int column_family_index, const std::string& key, - std::string* value) { +Status OlapMeta::get(const int column_family_index, const std::string& key, std::string* value) { DorisMetrics::instance()->meta_read_request_total->increment(1); rocksdb::ColumnFamilyHandle* handle = _handles[column_family_index]; int64_t duration_ns = 0; @@ -103,7 +102,7 @@ Status OlapMeta::get(const int column_family_index, const std::string& key, } bool OlapMeta::key_may_exist(const int column_family_index, const std::string& key, - std::string* value) { + std::string* value) { DorisMetrics::instance()->meta_read_request_total->increment(1); rocksdb::ColumnFamilyHandle* handle = _handles[column_family_index]; int64_t duration_ns = 0; @@ -113,12 +112,12 @@ bool OlapMeta::key_may_exist(const int column_family_index, const std::string& k is_exist = _db->KeyMayExist(ReadOptions(), handle, rocksdb::Slice(key), value); } DorisMetrics::instance()->meta_read_request_duration_us->increment(duration_ns / 1000); - + return is_exist; } Status OlapMeta::put(const int column_family_index, const std::string& key, - const std::string& value) { + const std::string& value) { DorisMetrics::instance()->meta_write_request_total->increment(1); rocksdb::ColumnFamilyHandle* handle = _handles[column_family_index]; int64_t duration_ns = 0; @@ -156,9 +155,8 @@ Status OlapMeta::remove(const int column_family_index, const std::string& key) { return Status::OK(); } -Status OlapMeta::iterate( - const int column_family_index, const std::string& prefix, - std::function const& func) { +Status OlapMeta::iterate(const int column_family_index, const std::string& prefix, + std::function const& func) { rocksdb::ColumnFamilyHandle* handle = _handles[column_family_index]; std::unique_ptr it(_db->NewIterator(ReadOptions(), handle)); if (prefix == "") { diff --git a/be/src/olap/olap_meta.h b/be/src/olap/olap_meta.h index 03e542f8d6..d2557415b3 100644 --- a/be/src/olap/olap_meta.h +++ b/be/src/olap/olap_meta.h @@ -43,7 +43,7 @@ public: Status remove(const int column_family_index, const std::string& key); Status iterate(const int column_family_index, const std::string& prefix, - std::function const& func); + std::function const& func); std::string get_root_path(); @@ -54,4 +54,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index 836ad1d486..370ad57d6c 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -182,7 +182,7 @@ void StorageEngine::_garbage_sweeper_thread_callback() { Status res = start_trash_sweep(&usage); if (!res.ok()) { LOG(WARNING) << "one or more errors occur when sweep trash." - << "see previous message for detail. err code=" << res; + << "see previous message for detail. err code=" << res; // do nothing. continue next loop. } } @@ -589,10 +589,12 @@ Status StorageEngine::_submit_compaction_task(TabletSharedPtr tablet, tablet->reset_compaction(compaction_type); _pop_tablet_from_submitted_compaction(tablet, compaction_type); if (!st.ok()) { - return Status::InternalError(strings::Substitute( - "failed to prepare compaction task and calculate permits, tablet_id=$0, compaction_type=$1, " - "permit=$2, current_permit=$3, status=$4", - tablet->tablet_id(), compaction_type, permits, _permit_limiter.usage(), st.get_error_msg())); + return Status::InternalError( + strings::Substitute("failed to prepare compaction task and calculate permits, " + "tablet_id=$0, compaction_type=$1, " + "permit=$2, current_permit=$3, status=$4", + tablet->tablet_id(), compaction_type, permits, + _permit_limiter.usage(), st.get_error_msg())); } return st; } diff --git a/be/src/olap/options.h b/be/src/olap/options.h index 8a21c46829..41ab8ff3f0 100644 --- a/be/src/olap/options.h +++ b/be/src/olap/options.h @@ -46,6 +46,6 @@ struct EngineOptions { // list paths that tablet will be put into. std::vector store_paths; // BE's UUID. It will be reset every time BE restarts. - UniqueId backend_uid{0, 0}; + UniqueId backend_uid {0, 0}; }; } // namespace doris diff --git a/be/src/olap/out_stream.cpp b/be/src/olap/out_stream.cpp index b5d19b841a..1240602a10 100644 --- a/be/src/olap/out_stream.cpp +++ b/be/src/olap/out_stream.cpp @@ -106,7 +106,7 @@ Status OutStream::_create_new_input_buffer() { } Status OutStream::_write_head(StorageByteBuffer* buf, uint64_t position, - StreamHead::StreamType type, uint32_t length) { + StreamHead::StreamType type, uint32_t length) { if (buf->limit() < sizeof(StreamHead) + length) { return Status::OLAPInternalError(OLAP_ERR_BUFFER_OVERFLOW); } @@ -119,7 +119,7 @@ Status OutStream::_write_head(StorageByteBuffer* buf, uint64_t position, } Status OutStream::_compress(StorageByteBuffer* input, StorageByteBuffer* output, - StorageByteBuffer* overflow, bool* smaller) { + StorageByteBuffer* overflow, bool* smaller) { Status res = Status::OK(); res = _compressor(input, overflow, smaller); diff --git a/be/src/olap/out_stream.h b/be/src/olap/out_stream.h index b1954ce6cf..1753431c27 100644 --- a/be/src/olap/out_stream.h +++ b/be/src/olap/out_stream.h @@ -106,22 +106,22 @@ public: private: Status _create_new_input_buffer(); Status _write_head(StorageByteBuffer* buf, uint64_t position, StreamHead::StreamType type, - uint32_t length); + uint32_t length); Status _spill(); Status _compress(StorageByteBuffer* input, StorageByteBuffer* output, - StorageByteBuffer* overflow, bool* smaller); + StorageByteBuffer* overflow, bool* smaller); void _output_uncompress(); void _output_compressed(); Status _make_sure_output_buffer(); - uint32_t _buffer_size; // Compressed block size - Compressor _compressor; // Compression function, if NULL means no compression + uint32_t _buffer_size; // Compressed block size + Compressor _compressor; // Compression function, if NULL means no compression std::vector _output_buffers; // Buffer all output bool _is_suppressed; // Whether the stream is terminated StorageByteBuffer* _current; // Cache uncompressed data StorageByteBuffer* _compressed; // Bytes to be output to output_buffers StorageByteBuffer* _overflow; // Bytes that can't fit in _output - uint64_t _spilled_bytes; // The number of bytes that have been output to output + uint64_t _spilled_bytes; // The number of bytes that have been output to output DISALLOW_COPY_AND_ASSIGN(OutStream); }; @@ -167,4 +167,3 @@ protected: */ } // namespace doris - diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index 16ebfe4b12..13da3018d4 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -52,19 +52,19 @@ public: // Load local data file into specified tablet. Status process_streaming_ingestion(TabletSharedPtr tablet, const TPushReq& request, - PushType push_type, - std::vector* tablet_info_vec); + PushType push_type, + std::vector* tablet_info_vec); int64_t write_bytes() const { return _write_bytes; } int64_t write_rows() const { return _write_rows; } private: Status _convert_v2(TabletSharedPtr cur_tablet, TabletSharedPtr new_tablet_vec, - RowsetSharedPtr* cur_rowset, RowsetSharedPtr* new_rowset); + RowsetSharedPtr* cur_rowset, RowsetSharedPtr* new_rowset); // Convert local data file to internal formatted delta, // return new delta's SegmentGroup Status _convert(TabletSharedPtr cur_tablet, TabletSharedPtr new_tablet_vec, - RowsetSharedPtr* cur_rowset, RowsetSharedPtr* new_rowset); + RowsetSharedPtr* cur_rowset, RowsetSharedPtr* new_rowset); // Only for debug std::string _debug_version_list(const Versions& versions) const; @@ -73,8 +73,8 @@ private: std::vector* tablet_info_vec); Status _do_streaming_ingestion(TabletSharedPtr tablet, const TPushReq& request, - PushType push_type, vector* tablet_vars, - std::vector* tablet_info_vec); + PushType push_type, vector* tablet_vars, + std::vector* tablet_info_vec); private: // mainly tablet_id, version and delta file path @@ -187,7 +187,7 @@ public: ~PushBrokerReader() {} Status init(const Schema* schema, const TBrokerScanRange& t_scan_range, - const TDescriptorTable& t_desc_tbl); + const TDescriptorTable& t_desc_tbl); Status next(ContiguousRow* row); void print_profile(); @@ -201,7 +201,7 @@ public: private: Status fill_field_row(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, - FieldType type); + FieldType type); bool _ready; bool _eof; bool _fill_tuple; @@ -218,4 +218,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index 90b47bce63..d173ca5697 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -145,7 +145,7 @@ bool TabletReader::_optimize_for_single_rowset( } Status TabletReader::_capture_rs_readers(const ReaderParams& read_params, - std::vector* valid_rs_readers) { + std::vector* valid_rs_readers) { const std::vector* rs_readers = &read_params.rs_readers; if (rs_readers->empty()) { LOG(WARNING) << "fail to acquire data sources. tablet=" << _tablet->full_name(); @@ -419,8 +419,8 @@ Status TabletReader::_init_keys_param(const ReaderParams& read_params) { return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); } - Status res = _keys_param.end_keys[i].init_scan_key( - _tablet->tablet_schema(), read_params.end_key[i].values(), schema); + Status res = _keys_param.end_keys[i].init_scan_key(_tablet->tablet_schema(), + read_params.end_key[i].values(), schema); if (!res.ok()) { LOG(WARNING) << "fail to init row cursor. res = " << res; return res; diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 88b5cafc42..07fe87c81a 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -62,6 +62,7 @@ class TabletReader { bool start_key_include = false; bool end_key_include = false; }; + public: // Params for Reader, // mainly include tablet, data version and fetch range. @@ -114,7 +115,7 @@ public: // Return OLAP_SUCCESS and set `*eof` to true when no more rows can be read. // Return others when unexpected error happens. virtual Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool, - ObjectPool* agg_pool, bool* eof) = 0; + ObjectPool* agg_pool, bool* eof) = 0; // Read next block with aggregation. // Return OLAP_SUCCESS and set `*eof` to false when next block is read @@ -122,7 +123,7 @@ public: // Return others when unexpected error happens. // TODO: Rethink here we still need mem_pool and agg_pool? virtual Status next_block_with_aggregation(vectorized::Block* block, MemPool* mem_pool, - ObjectPool* agg_pool, bool* eof) { + ObjectPool* agg_pool, bool* eof) { return Status::OLAPInternalError(OLAP_ERR_READER_INITIALIZE_ERROR); } @@ -133,9 +134,7 @@ public: _stats.rows_vec_del_cond_filtered; } - void set_batch_size(int batch_size) { - _batch_size = batch_size; - } + void set_batch_size(int batch_size) { _batch_size = batch_size; } const OlapReaderStatistics& stats() const { return _stats; } OlapReaderStatistics* mutable_stats() { return &_stats; } @@ -148,7 +147,7 @@ protected: Status _init_params(const ReaderParams& read_params); Status _capture_rs_readers(const ReaderParams& read_params, - std::vector* valid_rs_readers); + std::vector* valid_rs_readers); bool _optimize_for_single_rowset(const std::vector& rs_readers); @@ -229,4 +228,3 @@ protected: }; } // namespace doris - diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h index 0e64453e62..f40f2fd93b 100644 --- a/be/src/olap/row_block.h +++ b/be/src/olap/row_block.h @@ -140,4 +140,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index a29b74bca0..12e72998dd 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -461,7 +461,8 @@ Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, size_t column_string->insert_data(slice->data, slice->size); } else { return Status::NotSupported( - "Not support string len over than `string_type_length_soft_limit_bytes` in vec engine."); + "Not support string len over than " + "`string_type_length_soft_limit_bytes` in vec engine."); } } else { column_string->insert_default(); diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp index b451355fc2..3a8cf740c0 100644 --- a/be/src/olap/row_cursor.cpp +++ b/be/src/olap/row_cursor.cpp @@ -67,19 +67,19 @@ Status RowCursor::_init(const std::vector& columns) { } Status RowCursor::_init(const std::shared_ptr& shared_schema, - const std::vector& columns) { + const std::vector& columns) { _schema.reset(new Schema(*shared_schema.get())); return _init(columns); } Status RowCursor::_init(const std::vector& schema, - const std::vector& columns) { + const std::vector& columns) { _schema.reset(new Schema(schema, columns)); return _init(columns); } Status RowCursor::_init_scan_key(const TabletSchema& schema, - const std::vector& scan_keys) { + const std::vector& scan_keys) { // NOTE: cid equal with column index // Hyperloglog cannot be key, no need to handle it _variable_len = 0; @@ -173,7 +173,7 @@ Status RowCursor::init(const TabletSchema& schema, const std::vector& } Status RowCursor::init_scan_key(const TabletSchema& schema, - const std::vector& scan_keys) { + const std::vector& scan_keys) { size_t scan_key_size = scan_keys.size(); if (scan_key_size > schema.num_columns()) { LOG(WARNING) @@ -192,8 +192,8 @@ Status RowCursor::init_scan_key(const TabletSchema& schema, } Status RowCursor::init_scan_key(const TabletSchema& schema, - const std::vector& scan_keys, - const std::shared_ptr& shared_schema) { + const std::vector& scan_keys, + const std::shared_ptr& shared_schema) { size_t scan_key_size = scan_keys.size(); std::vector columns; diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h index 2474393f41..b60a11ddce 100644 --- a/be/src/olap/row_cursor.h +++ b/be/src/olap/row_cursor.h @@ -55,9 +55,8 @@ public: // Initialize with the size of the key, currently only used when splitting the range of key Status init_scan_key(const TabletSchema& schema, const std::vector& keys); - Status init_scan_key(const TabletSchema& schema, - const std::vector& keys, - const std::shared_ptr& shared_schema); + Status init_scan_key(const TabletSchema& schema, const std::vector& keys, + const std::shared_ptr& shared_schema); //allocate memory for string type, which include char, varchar, hyperloglog Status allocate_memory_for_string_type(const TabletSchema& schema); @@ -86,7 +85,7 @@ public: } // convert and deep copy field content Status convert_from(size_t index, const char* src, const TypeInfo* src_type, - MemPool* mem_pool) { + MemPool* mem_pool) { char* dest = cell_ptr(index); return column_schema(index)->convert_from(dest, src, src_type, mem_pool); } @@ -130,9 +129,7 @@ public: bool is_null(size_t index) const { return *reinterpret_cast(nullable_cell_ptr(index)); } - void set_null(size_t index) const { - *reinterpret_cast(nullable_cell_ptr(index)) = true; - } + void set_null(size_t index) const { *reinterpret_cast(nullable_cell_ptr(index)) = true; } void set_not_null(size_t index) const { *reinterpret_cast(nullable_cell_ptr(index)) = false; @@ -149,7 +146,7 @@ public: private: Status _init(const std::vector& columns); Status _init(const std::shared_ptr& shared_schema, - const std::vector& columns); + const std::vector& columns); // common init function Status _init(const std::vector& schema, const std::vector& columns); Status _alloc_buf(); @@ -170,4 +167,3 @@ private: DISALLOW_COPY_AND_ASSIGN(RowCursor); }; } // namespace doris - diff --git a/be/src/olap/rowset/alpha_rowset.cpp b/be/src/olap/rowset/alpha_rowset.cpp index 62bac2929a..4815ddf0a9 100644 --- a/be/src/olap/rowset/alpha_rowset.cpp +++ b/be/src/olap/rowset/alpha_rowset.cpp @@ -56,8 +56,9 @@ Status AlphaRowset::create_reader(std::shared_ptr* result) { } Status AlphaRowset::remove() { - VLOG_NOTICE << "begin to remove files in rowset " << unique_id() << ", version:" << start_version() - << "-" << end_version() << ", tabletid:" << _rowset_meta->tablet_id(); + VLOG_NOTICE << "begin to remove files in rowset " << unique_id() + << ", version:" << start_version() << "-" << end_version() + << ", tabletid:" << _rowset_meta->tablet_id(); for (auto segment_group : _segment_groups) { bool ret = segment_group->delete_all_files(); if (!ret) { @@ -114,7 +115,7 @@ Status AlphaRowset::copy_files_to(const std::string& dir, const RowsetId& new_ro } Status AlphaRowset::convert_from_old_files(const std::string& snapshot_path, - std::vector* success_files) { + std::vector* success_files) { for (auto& segment_group : _segment_groups) { Status status = segment_group->convert_from_old_files(snapshot_path, success_files); if (!status.ok()) { @@ -127,7 +128,7 @@ Status AlphaRowset::convert_from_old_files(const std::string& snapshot_path, } Status AlphaRowset::convert_to_old_files(const std::string& snapshot_path, - std::vector* success_files) { + std::vector* success_files) { for (auto& segment_group : _segment_groups) { Status status = segment_group->convert_to_old_files(snapshot_path, success_files); if (!status.ok()) { @@ -152,14 +153,15 @@ Status AlphaRowset::remove_old_files(std::vector* files_to_remove) } Status AlphaRowset::split_range(const RowCursor& start_key, const RowCursor& end_key, - uint64_t request_block_row_count, size_t key_num, - std::vector* ranges) { + uint64_t request_block_row_count, size_t key_num, + std::vector* ranges) { if (key_num > _schema->num_short_key_columns()) { // should not happen // But since aloha rowset is deprecated in future and it will not fail the query, // just use VLOG to avoid too many warning logs. - VLOG_NOTICE << "key num " << key_num << " should less than or equal to short key column number: " - << _schema->num_short_key_columns(); + VLOG_NOTICE << "key num " << key_num + << " should less than or equal to short key column number: " + << _schema->num_short_key_columns(); return Status::OLAPInternalError(OLAP_ERR_INVALID_SCHEMA); } EntrySlice entry; @@ -171,8 +173,8 @@ Status AlphaRowset::split_range(const RowCursor& start_key, const RowCursor& end if (largest_segment_group == nullptr || largest_segment_group->current_num_rows_per_row_block() == 0) { VLOG_NOTICE << "failed to get largest_segment_group. is null: " - << (largest_segment_group == nullptr) << ". version: " << start_version() - << "-" << end_version() << ". tablet: " << rowset_meta()->tablet_id(); + << (largest_segment_group == nullptr) << ". version: " << start_version() << "-" + << end_version() << ". tablet: " << rowset_meta()->tablet_id(); ranges->emplace_back(start_key.to_tuple()); ranges->emplace_back(end_key.to_tuple()); return Status::OK(); @@ -282,12 +284,14 @@ bool AlphaRowset::check_file_exist() { for (int i = 0; i < segment_group->num_segments(); ++i) { std::string data_path = segment_group->construct_data_file_path(i); if (!FileUtils::check_exist(data_path)) { - LOG(WARNING) << "data file not existed: " << data_path << " for rowset_id: " << rowset_id(); + LOG(WARNING) << "data file not existed: " << data_path + << " for rowset_id: " << rowset_id(); return false; } std::string index_path = segment_group->construct_index_file_path(i); if (!FileUtils::check_exist(index_path)) { - LOG(WARNING) << "index file not existed: " << index_path << " for rowset_id: " << rowset_id(); + LOG(WARNING) << "index file not existed: " << index_path + << " for rowset_id: " << rowset_id(); return false; } } @@ -304,13 +308,14 @@ Status AlphaRowset::init() { std::shared_ptr segment_group; if (_is_pending) { segment_group.reset(new SegmentGroup( - _rowset_meta->tablet_id(), _rowset_meta->rowset_id(), _schema, _rowset_path_desc.filepath, - false, segment_group_meta.segment_group_id(), segment_group_meta.num_segments(), - true, _rowset_meta->partition_id(), _rowset_meta->txn_id())); + _rowset_meta->tablet_id(), _rowset_meta->rowset_id(), _schema, + _rowset_path_desc.filepath, false, segment_group_meta.segment_group_id(), + segment_group_meta.num_segments(), true, _rowset_meta->partition_id(), + _rowset_meta->txn_id())); } else { segment_group.reset(new SegmentGroup( - _rowset_meta->tablet_id(), _rowset_meta->rowset_id(), _schema, _rowset_path_desc.filepath, - _rowset_meta->version(), false, + _rowset_meta->tablet_id(), _rowset_meta->rowset_id(), _schema, + _rowset_path_desc.filepath, _rowset_meta->version(), false, segment_group_meta.segment_group_id(), segment_group_meta.num_segments())); } if (segment_group == nullptr) { @@ -345,7 +350,8 @@ Status AlphaRowset::init() { // table value column, so when first start the two number is not the same, // it causes start failed. When `expect_zone_maps_num > zone_maps_size` it may be the first start after upgrade if (expect_zone_maps_num > zone_maps_size) { - VLOG_CRITICAL << "tablet: " << _rowset_meta->tablet_id() << " expect zone map size is " + VLOG_CRITICAL + << "tablet: " << _rowset_meta->tablet_id() << " expect zone map size is " << expect_zone_maps_num << ", actual num is " << zone_maps_size << ". If this is not the first start after upgrade, please pay attention!"; } diff --git a/be/src/olap/rowset/alpha_rowset.h b/be/src/olap/rowset/alpha_rowset.h index 73651f920f..9f4c838723 100644 --- a/be/src/olap/rowset/alpha_rowset.h +++ b/be/src/olap/rowset/alpha_rowset.h @@ -42,8 +42,8 @@ public: Status create_reader(std::shared_ptr* result) override; Status split_range(const RowCursor& start_key, const RowCursor& end_key, - uint64_t request_block_row_count, size_t key_num, - std::vector* ranges) override; + uint64_t request_block_row_count, size_t key_num, + std::vector* ranges) override; Status remove() override; @@ -52,10 +52,10 @@ public: Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) override; Status convert_from_old_files(const std::string& snapshot_path, - std::vector* success_files); + std::vector* success_files); Status convert_to_old_files(const std::string& snapshot_path, - std::vector* success_files); + std::vector* success_files); Status remove_old_files(std::vector* files_to_remove) override; diff --git a/be/src/olap/rowset/alpha_rowset_reader.cpp b/be/src/olap/rowset/alpha_rowset_reader.cpp index 57aa9c0758..3b0d7472ee 100644 --- a/be/src/olap/rowset/alpha_rowset_reader.cpp +++ b/be/src/olap/rowset/alpha_rowset_reader.cpp @@ -355,7 +355,7 @@ Status AlphaRowsetReader::_init_merge_ctxs(RowsetReaderContext* read_context) { if (new_column_data->rowset_pruning_filter()) { _stats->rows_stats_filtered += new_column_data->num_rows(); VLOG_NOTICE << "filter segment group in query in condition. version=" - << new_column_data->version(); + << new_column_data->version(); continue; } } @@ -363,15 +363,16 @@ Status AlphaRowsetReader::_init_merge_ctxs(RowsetReaderContext* read_context) { int ret = new_column_data->delete_pruning_filter(); if (ret == DEL_SATISFIED) { _stats->rows_del_filtered += new_column_data->num_rows(); - VLOG_NOTICE << "filter segment group in delete predicate:" << new_column_data->version(); + VLOG_NOTICE << "filter segment group in delete predicate:" + << new_column_data->version(); continue; } else if (ret == DEL_PARTIAL_SATISFIED) { VLOG_NOTICE << "filter segment group partially in delete predicate:" - << new_column_data->version(); + << new_column_data->version(); new_column_data->set_delete_status(DEL_PARTIAL_SATISFIED); } else { VLOG_NOTICE << "not filter segment group in delete predicate:" - << new_column_data->version(); + << new_column_data->version(); new_column_data->set_delete_status(DEL_NOT_SATISFIED); } auto merge_ctx = new AlphaMergeContext(); diff --git a/be/src/olap/rowset/alpha_rowset_reader.h b/be/src/olap/rowset/alpha_rowset_reader.h index 89e86b404d..3dffec2457 100644 --- a/be/src/olap/rowset/alpha_rowset_reader.h +++ b/be/src/olap/rowset/alpha_rowset_reader.h @@ -63,7 +63,7 @@ public: // It's ok, because we only get ref here, the block's owner is this reader. Status next_block(RowBlock** block) override; - Status next_block(vectorized::Block *block) override { + Status next_block(vectorized::Block* block) override { return Status::OLAPInternalError(OLAP_ERR_DATA_EOF); } diff --git a/be/src/olap/rowset/alpha_rowset_writer.cpp b/be/src/olap/rowset/alpha_rowset_writer.cpp index 03cded4aab..138b9a73cf 100644 --- a/be/src/olap/rowset/alpha_rowset_writer.cpp +++ b/be/src/olap/rowset/alpha_rowset_writer.cpp @@ -112,8 +112,8 @@ Status AlphaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { return Status::OK(); } -Status AlphaRowsetWriter::add_rowset_for_linked_schema_change( - RowsetSharedPtr rowset, const SchemaMapping& schema_mapping) { +Status AlphaRowsetWriter::add_rowset_for_linked_schema_change(RowsetSharedPtr rowset, + const SchemaMapping& schema_mapping) { _need_column_data_writer = false; // this api is for LinkedSchemaChange // use create hard link to copy rowset for performance @@ -263,8 +263,7 @@ Status AlphaRowsetWriter::_init() { _cur_segment_group = new (std::nothrow) SegmentGroup( _rowset_writer_context.tablet_id, _rowset_writer_context.rowset_id, _rowset_writer_context.tablet_schema, _rowset_writer_context.path_desc.filepath, - _rowset_writer_context.version, false, - _segment_group_id, 0); + _rowset_writer_context.version, false, _segment_group_id, 0); } DCHECK(_cur_segment_group != nullptr) << "failed to malloc SegmentGroup"; _cur_segment_group->acquire(); diff --git a/be/src/olap/rowset/alpha_rowset_writer.h b/be/src/olap/rowset/alpha_rowset_writer.h index 9928aad948..a4f7dfa479 100644 --- a/be/src/olap/rowset/alpha_rowset_writer.h +++ b/be/src/olap/rowset/alpha_rowset_writer.h @@ -41,7 +41,7 @@ public: // add rowset by create hard link Status add_rowset(RowsetSharedPtr rowset) override; Status add_rowset_for_linked_schema_change(RowsetSharedPtr rowset, - const SchemaMapping& schema_mapping) override; + const SchemaMapping& schema_mapping) override; Status add_rowset_for_migration(RowsetSharedPtr rowset) override; Status flush() override; diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 38a85d3a72..1627a5bf5d 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -31,8 +31,8 @@ namespace doris { -FilePathDesc BetaRowset::segment_file_path(const FilePathDesc& segment_dir_desc, const RowsetId& rowset_id, - int segment_id) { +FilePathDesc BetaRowset::segment_file_path(const FilePathDesc& segment_dir_desc, + const RowsetId& rowset_id, int segment_id) { FilePathDescStream path_desc_s; path_desc_s << segment_dir_desc << "/" << rowset_id.to_string() << "_" << segment_id << ".dat"; return path_desc_s.path_desc(); @@ -61,7 +61,7 @@ Status BetaRowset::load_segments(std::vector* segm auto s = segment_v2::Segment::open(seg_path_desc, seg_id, _schema, &segment); if (!s.ok()) { LOG(WARNING) << "failed to open segment. " << seg_path_desc.debug_string() - << " under rowset " << unique_id() << " : " << s.to_string(); + << " under rowset " << unique_id() << " : " << s.to_string(); return Status::OLAPInternalError(OLAP_ERR_ROWSET_LOAD_FAILED); } segments->push_back(std::move(segment)); @@ -76,8 +76,8 @@ Status BetaRowset::create_reader(RowsetReaderSharedPtr* result) { } Status BetaRowset::split_range(const RowCursor& start_key, const RowCursor& end_key, - uint64_t request_block_row_count, size_t key_num, - std::vector* ranges) { + uint64_t request_block_row_count, size_t key_num, + std::vector* ranges) { ranges->emplace_back(start_key.to_tuple()); ranges->emplace_back(end_key.to_tuple()); return Status::OK(); @@ -95,8 +95,8 @@ Status BetaRowset::remove() { fs::BlockManager* block_mgr = fs::fs_util::block_manager(path_desc); if (!block_mgr->delete_block(path_desc).ok()) { char errmsg[64]; - VLOG_NOTICE << "failed to delete file. err=" << strerror_r(errno, errmsg, 64) - << ", " << path_desc.debug_string(); + VLOG_NOTICE << "failed to delete file. err=" << strerror_r(errno, errmsg, 64) << ", " + << path_desc.debug_string(); success = false; } } @@ -116,7 +116,8 @@ Status BetaRowset::link_files_to(const FilePathDesc& dir_desc, RowsetId new_rows FilePathDesc dst_link_path_desc = segment_file_path(dir_desc, new_rowset_id, i); // TODO(lingbin): use Env API? or EnvUtil? if (FileUtils::check_exist(dst_link_path_desc.filepath)) { - LOG(WARNING) << "failed to create hard link, file already exist: " << dst_link_path_desc.filepath; + LOG(WARNING) << "failed to create hard link, file already exist: " + << dst_link_path_desc.filepath; return Status::OLAPInternalError(OLAP_ERR_FILE_ALREADY_EXIST); } FilePathDesc src_file_path_desc = segment_file_path(_rowset_path_desc, rowset_id(), i); @@ -124,7 +125,8 @@ Status BetaRowset::link_files_to(const FilePathDesc& dir_desc, RowsetId new_rows // use copy? or keep refcount to avoid being delete? fs::BlockManager* block_mgr = fs::fs_util::block_manager(dir_desc); if (!block_mgr->link_file(src_file_path_desc, dst_link_path_desc).ok()) { - LOG(WARNING) << "fail to create hard link. from=" << src_file_path_desc.debug_string() << ", " + LOG(WARNING) << "fail to create hard link. from=" << src_file_path_desc.debug_string() + << ", " << "to=" << dst_link_path_desc.debug_string() << ", errno=" << Errno::no(); return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); } @@ -146,18 +148,18 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row } FilePathDesc src_path_desc = segment_file_path(_rowset_path_desc, rowset_id(), i); if (!Env::Default()->copy_path(src_path_desc.filepath, dst_path_desc.filepath).ok()) { - LOG(WARNING) << "fail to copy file. from=" << src_path_desc.filepath << ", to=" - << dst_path_desc.filepath << ", errno=" << Errno::no(); + LOG(WARNING) << "fail to copy file. from=" << src_path_desc.filepath + << ", to=" << dst_path_desc.filepath << ", errno=" << Errno::no(); return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); } } return Status::OK(); } -Status BetaRowset::upload_files_to(const FilePathDesc& dir_desc, - const RowsetId& new_rowset_id, bool delete_src) { - std::shared_ptr storage_backend = StorageBackendMgr::instance()-> - get_storage_backend(dir_desc.storage_name); +Status BetaRowset::upload_files_to(const FilePathDesc& dir_desc, const RowsetId& new_rowset_id, + bool delete_src) { + std::shared_ptr storage_backend = + StorageBackendMgr::instance()->get_storage_backend(dir_desc.storage_name); if (storage_backend == nullptr) { LOG(WARNING) << "storage_backend is invalid: " << dir_desc.debug_string(); return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); @@ -175,12 +177,13 @@ Status BetaRowset::upload_files_to(const FilePathDesc& dir_desc, FilePathDesc src_path_desc = segment_file_path(_rowset_path_desc, rowset_id(), i); if (!storage_backend->upload(src_path_desc.filepath, dst_path_desc.remote_path).ok()) { - LOG(WARNING) << "fail to upload file. from=" << src_path_desc.filepath << ", to=" - << dst_path_desc.remote_path << ", errno=" << Errno::no(); + LOG(WARNING) << "fail to upload file. from=" << src_path_desc.filepath + << ", to=" << dst_path_desc.remote_path << ", errno=" << Errno::no(); return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); } if (delete_src && !Env::Default()->delete_file(src_path_desc.filepath).ok()) { - LOG(WARNING) << "fail to delete local file: " << src_path_desc.filepath << ", errno=" << Errno::no(); + LOG(WARNING) << "fail to delete local file: " << src_path_desc.filepath + << ", errno=" << Errno::no(); return Status::OLAPInternalError(OLAP_ERR_OS_ERROR); } LOG(INFO) << "succeed to upload file. from " << src_path_desc.filepath << " to " @@ -200,8 +203,8 @@ bool BetaRowset::check_path(const std::string& path) { bool BetaRowset::check_file_exist() { if (_rowset_path_desc.is_remote()) { - std::shared_ptr storage_backend = StorageBackendMgr::instance()-> - get_storage_backend(_rowset_path_desc.storage_name); + std::shared_ptr storage_backend = + StorageBackendMgr::instance()->get_storage_backend(_rowset_path_desc.storage_name); if (storage_backend == nullptr) { LOG(WARNING) << "storage_backend is invalid: " << _rowset_path_desc.debug_string(); return false; diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index ffb6467c57..0478f832e1 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -39,12 +39,12 @@ public: Status create_reader(RowsetReaderSharedPtr* result) override; - static FilePathDesc segment_file_path(const FilePathDesc& segment_dir_desc, const RowsetId& rowset_id, - int segment_id); + static FilePathDesc segment_file_path(const FilePathDesc& segment_dir_desc, + const RowsetId& rowset_id, int segment_id); Status split_range(const RowCursor& start_key, const RowCursor& end_key, - uint64_t request_block_row_count, size_t key_num, - std::vector* ranges) override; + uint64_t request_block_row_count, size_t key_num, + std::vector* ranges) override; Status remove() override; @@ -52,8 +52,8 @@ public: Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) override; - Status upload_files_to(const FilePathDesc& dir_desc, - const RowsetId& new_rowset_id, bool delete_src = false) override; + Status upload_files_to(const FilePathDesc& dir_desc, const RowsetId& new_rowset_id, + bool delete_src = false) override; // only applicable to alpha rowset, no op here Status remove_old_files(std::vector* files_to_remove) override { diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index b4cd2a7bc6..ff16611528 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -32,9 +32,7 @@ namespace doris { BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset) - : _context(nullptr), - _rowset(std::move(rowset)), - _stats(&_owned_stats) { + : _context(nullptr), _rowset(std::move(rowset)), _stats(&_owned_stats) { _rowset->acquire(); } @@ -48,7 +46,8 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { _stats = _context->stats; } // SegmentIterator will load seek columns on demand - _schema = std::make_unique(_context->tablet_schema->columns(), *(_context->return_columns)); + _schema = std::make_unique(_context->tablet_schema->columns(), + *(_context->return_columns)); // convert RowsetReaderContext to StorageReadOptions StorageReadOptions read_options; @@ -89,7 +88,8 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { // load segments RETURN_NOT_OK(SegmentLoader::instance()->load_segments( - _rowset, &_segment_cache_handle, read_context->reader_type == ReaderType::READER_QUERY)); + _rowset, &_segment_cache_handle, + read_context->reader_type == ReaderType::READER_QUERY)); // create iterator for each segment std::vector> seg_iterators; @@ -112,13 +112,16 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { // merge or union segment iterator RowwiseIterator* final_iterator; if (config::enable_storage_vectorization && read_context->is_vec) { - if (read_context->need_ordered_result && _rowset->rowset_meta()->is_segments_overlapping()) { - final_iterator = vectorized::new_merge_iterator(iterators, read_context->sequence_id_idx); + if (read_context->need_ordered_result && + _rowset->rowset_meta()->is_segments_overlapping()) { + final_iterator = + vectorized::new_merge_iterator(iterators, read_context->sequence_id_idx); } else { final_iterator = vectorized::new_union_iterator(iterators); } } else { - if (read_context->need_ordered_result && _rowset->rowset_meta()->is_segments_overlapping()) { + if (read_context->need_ordered_result && + _rowset->rowset_meta()->is_segments_overlapping()) { final_iterator = new_merge_iterator(iterators, read_context->sequence_id_idx); } else { final_iterator = new_union_iterator(iterators); @@ -133,8 +136,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { _iterator.reset(final_iterator); // init input block - _input_block.reset(new RowBlockV2(*_schema, - std::min(1024, read_context->batch_size))); + _input_block.reset(new RowBlockV2(*_schema, std::min(1024, read_context->batch_size))); if (!read_context->is_vec) { // init input/output block and row @@ -227,7 +229,8 @@ Status BetaRowsetReader::next_block(vectorized::Block* block) { } } is_first = false; - } while (block->rows() < _context->batch_size); // here we should keep block.rows() < batch_size + } while (block->rows() < + _context->batch_size); // here we should keep block.rows() < batch_size } return Status::OK(); diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 16ee1be6dc..88eef92b2f 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -52,18 +52,20 @@ BetaRowsetWriter::~BetaRowsetWriter() { _segment_writer.reset(); // ensure all files are closed Status st; if (_context.path_desc.is_remote()) { - std::shared_ptr storage_backend = StorageBackendMgr::instance()-> - get_storage_backend(_context.path_desc.storage_name); + std::shared_ptr storage_backend = + StorageBackendMgr::instance()->get_storage_backend( + _context.path_desc.storage_name); if (storage_backend == nullptr) { LOG(WARNING) << "storage_backend is invalid: " << _context.path_desc.debug_string(); return; } WARN_IF_ERROR(storage_backend->rmdir(_context.path_desc.remote_path), - strings::Substitute("Failed to delete remote file=$0", _context.path_desc.remote_path)); + strings::Substitute("Failed to delete remote file=$0", + _context.path_desc.remote_path)); } for (int i = 0; i < _num_segment; ++i) { - auto path_desc = BetaRowset::segment_file_path(_context.path_desc, - _context.rowset_id, i); + auto path_desc = + BetaRowset::segment_file_path(_context.path_desc, _context.rowset_id, i); // Even if an error is encountered, these files that have not been cleaned up // will be cleaned up by the GC background. So here we only print the error // message when we encounter an error. @@ -166,8 +168,8 @@ Status BetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { return Status::OK(); } -Status BetaRowsetWriter::add_rowset_for_linked_schema_change( - RowsetSharedPtr rowset, const SchemaMapping& schema_mapping) { +Status BetaRowsetWriter::add_rowset_for_linked_schema_change(RowsetSharedPtr rowset, + const SchemaMapping& schema_mapping) { // TODO use schema_mapping to transfer zonemap return add_rowset(rowset); } @@ -185,13 +187,15 @@ Status BetaRowsetWriter::add_rowset_for_migration(RowsetSharedPtr rowset) { } else if (!rowset->rowset_path_desc().is_remote() && _context.path_desc.is_remote()) { res = rowset->upload_files_to(_context.path_desc, _context.rowset_id); if (!res.ok()) { - LOG(WARNING) << "upload_files failed. src: " << rowset->rowset_path_desc().debug_string() + LOG(WARNING) << "upload_files failed. src: " + << rowset->rowset_path_desc().debug_string() << ", dest: " << _context.path_desc.debug_string(); return res; } } else { LOG(WARNING) << "add_rowset_for_migration failed. storage_medium is invalid. src: " - << rowset->rowset_path_desc().debug_string() << ", dest: " << _context.path_desc.debug_string(); + << rowset->rowset_path_desc().debug_string() + << ", dest: " << _context.path_desc.debug_string(); return Status::OLAPInternalError(OLAP_ERR_ROWSET_ADD_MIGRATION_V2); } @@ -232,7 +236,7 @@ Status BetaRowsetWriter::flush_single_memtable(MemTable* memtable, int64_t* flus } if (PREDICT_FALSE(writer->estimate_segment_size() >= MAX_SEGMENT_SIZE || - writer->num_rows_written() >= _context.max_rows_per_segment)) { + writer->num_rows_written() >= _context.max_rows_per_segment)) { RETURN_NOT_OK(_flush_segment_writer(&writer)); } ++_num_rows_written; @@ -282,9 +286,10 @@ RowsetSharedPtr BetaRowsetWriter::build() { return rowset; } -Status BetaRowsetWriter::_create_segment_writer(std::unique_ptr* writer) { - auto path_desc = BetaRowset::segment_file_path(_context.path_desc, _context.rowset_id, - _num_segment++); +Status BetaRowsetWriter::_create_segment_writer( + std::unique_ptr* writer) { + auto path_desc = + BetaRowset::segment_file_path(_context.path_desc, _context.rowset_id, _num_segment++); // TODO(lingbin): should use a more general way to get BlockManager object // and tablets with the same type should share one BlockManager object; fs::BlockManager* block_mgr = fs::fs_util::block_manager(_context.path_desc); @@ -293,7 +298,7 @@ Status BetaRowsetWriter::_create_segment_writer(std::unique_ptrcreate_block(opts, &wblock); if (!st.ok()) { - LOG(WARNING) << "failed to create writable block. path=" << path_desc.filepath + LOG(WARNING) << "failed to create writable block. path=" << path_desc.filepath << ", err: " << st.get_error_msg(); return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED); } @@ -301,7 +306,8 @@ Status BetaRowsetWriter::_create_segment_writer(std::unique_ptrreset(new segment_v2::SegmentWriter(wblock.get(), _num_segment, _context.tablet_schema, - _context.data_dir, _context.max_rows_per_segment, writer_options)); + _context.data_dir, _context.max_rows_per_segment, + writer_options)); { std::lock_guard l(_lock); _wblocks.push_back(std::move(wblock)); diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index 0bce1af5cc..53c3496e05 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -44,12 +44,12 @@ public: Status add_row(const ContiguousRow& row) override { return _add_row(row); } Status add_block(const vectorized::Block* block) override; - + // add rowset by create hard link Status add_rowset(RowsetSharedPtr rowset) override; Status add_rowset_for_linked_schema_change(RowsetSharedPtr rowset, - const SchemaMapping& schema_mapping) override; + const SchemaMapping& schema_mapping) override; Status add_rowset_for_migration(RowsetSharedPtr rowset) override; diff --git a/be/src/olap/rowset/column_data.cpp b/be/src/olap/rowset/column_data.cpp index bf924296da..fa0bebfbe0 100644 --- a/be/src/olap/rowset/column_data.cpp +++ b/be/src/olap/rowset/column_data.cpp @@ -132,10 +132,9 @@ Status ColumnData::_seek_to_block(const RowBlockPosition& block_pos, bool withou SAFE_DELETE(_segment_reader); std::string file_name; file_name = segment_group()->construct_data_file_path(block_pos.segment); - _segment_reader = new (std::nothrow) - SegmentReader(file_name, segment_group(), block_pos.segment, _seek_columns, - _load_bf_columns, _conditions, _delete_handler, _delete_status, - _lru_cache, _runtime_state, _stats); + _segment_reader = new (std::nothrow) SegmentReader( + file_name, segment_group(), block_pos.segment, _seek_columns, _load_bf_columns, + _conditions, _delete_handler, _delete_status, _lru_cache, _runtime_state, _stats); if (_segment_reader == nullptr) { OLAP_LOG_WARNING("fail to malloc segment reader."); return Status::OLAPInternalError(OLAP_ERR_MALLOC_ERROR); @@ -162,7 +161,7 @@ Status ColumnData::_seek_to_block(const RowBlockPosition& block_pos, bool withou } Status ColumnData::_find_position_by_short_key(const RowCursor& key, bool find_last_key, - RowBlockPosition* position) { + RowBlockPosition* position) { RowBlockPosition tmp_pos; auto res = _segment_group->find_short_key(key, &_short_key_cursor, find_last_key, &tmp_pos); if (!res.ok()) { @@ -182,7 +181,7 @@ Status ColumnData::_find_position_by_short_key(const RowCursor& key, bool find_l } Status ColumnData::_find_position_by_full_key(const RowCursor& key, bool find_last_key, - RowBlockPosition* position) { + RowBlockPosition* position) { RowBlockPosition tmp_pos; auto res = _segment_group->find_short_key(key, &_short_key_cursor, false, &tmp_pos); if (!res.ok()) { @@ -253,8 +252,8 @@ Status ColumnData::_find_position_by_full_key(const RowCursor& key, bool find_la } if (!(res = segment_group()->advance_row_block(*it_result, &start_position))) { - LOG(WARNING) << "fail to advance row_block. res=" << res - << " it_offset=" << *it_result << " start_pos=" << start_position.to_string(); + LOG(WARNING) << "fail to advance row_block. res=" << res << " it_offset=" << *it_result + << " start_pos=" << start_position.to_string(); return res; } @@ -292,11 +291,10 @@ Status ColumnData::_seek_to_row(const RowCursor& key, bool find_last_key, bool i bool without_filter = is_end_key; res = _seek_to_block(position, without_filter); if (!res.ok()) { - LOG(WARNING) << "fail to get row block. res=" << res - << " segment=" << position.segment - << " block_size=" << position.block_size - << " data_offset=" << position.data_offset - << " index_offset=" << position.index_offset; + LOG(WARNING) << "fail to get row block. res=" << res << " segment=" << position.segment + << " block_size=" << position.block_size + << " data_offset=" << position.data_offset + << " index_offset=" << position.index_offset; return res; } res = _get_block(without_filter); @@ -346,8 +344,8 @@ const RowCursor* ColumnData::seek_and_get_current_row(const RowBlockPosition& po } Status ColumnData::prepare_block_read(const RowCursor* start_key, bool find_start_key, - const RowCursor* end_key, bool find_end_key, - RowBlock** first_block) { + const RowCursor* end_key, bool find_end_key, + RowBlock** first_block) { SCOPED_RAW_TIMER(&_stats->block_fetch_ns); set_eof(false); _end_key_is_set = false; @@ -583,7 +581,7 @@ Status ColumnData::schema_change_init() { } Status ColumnData::_get_block_from_reader(VectorizedRowBatch** got_batch, bool without_filter, - int rows_read) { + int rows_read) { VectorizedRowBatch* vec_batch = nullptr; if (_is_normal_read) { vec_batch = _read_vector_batch.get(); diff --git a/be/src/olap/rowset/column_data.h b/be/src/olap/rowset/column_data.h index 7502c77ac4..c2c91fab79 100644 --- a/be/src/olap/rowset/column_data.h +++ b/be/src/olap/rowset/column_data.h @@ -51,16 +51,15 @@ public: Status init(); Status prepare_block_read(const RowCursor* start_key, bool find_start_key, - const RowCursor* end_key, bool find_end_key, - RowBlock** first_block); + const RowCursor* end_key, bool find_end_key, RowBlock** first_block); Status get_next_block(RowBlock** row_block); void set_read_params(const std::vector& return_columns, const std::vector& seek_columns, const std::set& load_bf_columns, const Conditions& conditions, - std::shared_ptr> col_predicates, bool is_using_cache, - RuntimeState* runtime_state); + std::shared_ptr> col_predicates, + bool is_using_cache, RuntimeState* runtime_state); Status get_first_row_block(RowBlock** row_block); @@ -114,9 +113,9 @@ private: Status _seek_to_block(const RowBlockPosition& block_pos, bool without_filter); Status _find_position_by_short_key(const RowCursor& key, bool find_last_key, - RowBlockPosition* position); + RowBlockPosition* position); Status _find_position_by_full_key(const RowCursor& key, bool find_last_key, - RowBlockPosition* position); + RowBlockPosition* position); // Used in _seek_to_row, this function will goto next row that valid for this // ColumnData @@ -125,7 +124,7 @@ private: // get block from reader, just read vector batch from _current_segment. // The read batch return by got_batch. Status _get_block_from_reader(VectorizedRowBatch** got_batch, bool without_filter, - int rows_read); + int rows_read); // get block from segment reader. If this function returns OLAP_SUCCESS Status _get_block(bool without_filter, int rows_read = 0); diff --git a/be/src/olap/rowset/column_reader.cpp b/be/src/olap/rowset/column_reader.cpp index 327d615287..b4d328a6d0 100644 --- a/be/src/olap/rowset/column_reader.cpp +++ b/be/src/olap/rowset/column_reader.cpp @@ -31,8 +31,7 @@ IntegerColumnReader::~IntegerColumnReader() { SAFE_DELETE(_data_reader); } -Status IntegerColumnReader::init(std::map* streams, - bool is_sign) { +Status IntegerColumnReader::init(std::map* streams, bool is_sign) { if (nullptr == streams) { OLAP_LOG_WARNING("input streams is nullptr"); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -81,8 +80,8 @@ StringColumnDirectReader::~StringColumnDirectReader() { SAFE_DELETE(_length_reader); } -Status StringColumnDirectReader::init(std::map* streams, - int size, MemPool* mem_pool) { +Status StringColumnDirectReader::init(std::map* streams, int size, + MemPool* mem_pool) { if (nullptr == streams) { OLAP_LOG_WARNING("input streams is nullptr"); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -167,7 +166,7 @@ Status StringColumnDirectReader::next(char* buffer, uint32_t* length) { } Status StringColumnDirectReader::next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool, int64_t* read_bytes) { + MemPool* mem_pool, int64_t* read_bytes) { /* * MemPool here is not the same as MemPool in init function * 1. MemPool is created by VectorizedRowBatch, @@ -284,7 +283,7 @@ StringColumnDictionaryReader::~StringColumnDictionaryReader() { } Status StringColumnDictionaryReader::init(std::map* streams, - int size, MemPool* mem_pool) { + int size, MemPool* mem_pool) { ReadOnlyFileStream* dictionary_data_stream = extract_stream(_column_unique_id, StreamInfoMessage::DICTIONARY_DATA, streams); @@ -404,7 +403,7 @@ Status StringColumnDictionaryReader::next(char* buffer, uint32_t* length) { } Status StringColumnDictionaryReader::next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool, int64_t* read_bytes) { + MemPool* mem_pool, int64_t* read_bytes) { int64_t index[size]; int64_t buffer_size = 0; Status res = Status::OK(); @@ -661,7 +660,7 @@ ColumnReader::~ColumnReader() { } Status ColumnReader::init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { if (nullptr == streams) { OLAP_LOG_WARNING("null parameters given."); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -710,8 +709,7 @@ Status ColumnReader::skip(uint64_t row_count) { return Status::OK(); } -Status ColumnReader::next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool) { +Status ColumnReader::next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool) { Status res = Status::OK(); column_vector->set_is_null(_is_null); if (nullptr != _present_reader) { @@ -764,7 +762,7 @@ TinyColumnReader::~TinyColumnReader() { } Status TinyColumnReader::init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { if (nullptr == streams) { OLAP_LOG_WARNING("input streams is nullptr"); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -818,7 +816,7 @@ Status TinyColumnReader::skip(uint64_t row_count) { } Status TinyColumnReader::next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool) { + MemPool* mem_pool) { Status res = ColumnReader::next_vector(column_vector, size, mem_pool); if (!res.ok()) { if (Status::OLAPInternalError(OLAP_ERR_DATA_EOF) == res) { @@ -868,7 +866,7 @@ DecimalColumnReader::~DecimalColumnReader() { } Status DecimalColumnReader::init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { if (nullptr == streams) { OLAP_LOG_WARNING("input streams is nullptr"); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -967,7 +965,7 @@ Status DecimalColumnReader::skip(uint64_t row_count) { } Status DecimalColumnReader::next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool) { + MemPool* mem_pool) { Status res = ColumnReader::next_vector(column_vector, size, mem_pool); if (!res.ok()) { if (Status::OLAPInternalError(OLAP_ERR_DATA_EOF) == res) { @@ -1034,7 +1032,7 @@ LargeIntColumnReader::~LargeIntColumnReader() { } Status LargeIntColumnReader::init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { if (nullptr == streams) { OLAP_LOG_WARNING("input streams is nullptr"); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -1129,7 +1127,7 @@ Status LargeIntColumnReader::skip(uint64_t row_count) { } Status LargeIntColumnReader::next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool) { + MemPool* mem_pool) { Status res = ColumnReader::next_vector(column_vector, size, mem_pool); if (!res.ok()) { if (Status::OLAPInternalError(OLAP_ERR_DATA_EOF) == res) { diff --git a/be/src/olap/rowset/column_reader.h b/be/src/olap/rowset/column_reader.h index 9ea973cca9..4c9593fd5e 100644 --- a/be/src/olap/rowset/column_reader.h +++ b/be/src/olap/rowset/column_reader.h @@ -100,8 +100,7 @@ public: StringColumnDirectReader(uint32_t column_unique_id, uint32_t dictionary_size); ~StringColumnDirectReader(); - Status init(std::map* streams, int size, - MemPool* mem_pool); + Status init(std::map* streams, int size, MemPool* mem_pool); Status seek(PositionProvider* positions); Status skip(uint64_t row_count); // Return the data of the current row and move the internal pointer backward @@ -109,7 +108,7 @@ public: // length - the size of the buffer area when input, and the size of the string when returning Status next(char* buffer, uint32_t* length); Status next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool, - int64_t* read_bytes); + int64_t* read_bytes); size_t get_buffer_size() { return sizeof(RunLengthByteReader); } @@ -132,13 +131,12 @@ class StringColumnDictionaryReader { public: StringColumnDictionaryReader(uint32_t column_unique_id, uint32_t dictionary_size); ~StringColumnDictionaryReader(); - Status init(std::map* streams, int size, - MemPool* mem_pool); + Status init(std::map* streams, int size, MemPool* mem_pool); Status seek(PositionProvider* positions); Status skip(uint64_t row_count); Status next(char* buffer, uint32_t* length); Status next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool, - int64_t* read_bytes); + int64_t* read_bytes); size_t get_buffer_size() { return sizeof(RunLengthByteReader) + _dictionary_size; } @@ -189,7 +187,7 @@ public: // Input: // streams-input stream virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats); + MemPool* mem_pool, OlapReaderStatistics* stats); // Set the position of the next returned data // positions are the positions where each column needs to seek, ColumnReader passes (*positions)[_column_unique_id] @@ -234,7 +232,7 @@ public: virtual ~DefaultValueReader() {} virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { switch (_type) { case OLAP_FIELD_TYPE_TINYINT: { _values = reinterpret_cast(mem_pool->allocate(size * sizeof(int8_t))); @@ -388,7 +386,7 @@ public: NullValueReader(uint32_t column_id, uint32_t column_unique_id) : ColumnReader(column_id, column_unique_id) {} Status init(std::map* streams, int size, MemPool* mem_pool, - OlapReaderStatistics* stats) override { + OlapReaderStatistics* stats) override { _is_null = reinterpret_cast(mem_pool->allocate(size)); memset(_is_null, 1, size); _stats = stats; @@ -397,7 +395,7 @@ public: virtual Status seek(PositionProvider* positions) override { return Status::OK(); } virtual Status skip(uint64_t row_count) override { return Status::OK(); } virtual Status next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool) override { + MemPool* mem_pool) override { column_vector->set_no_nulls(false); column_vector->set_is_null(_is_null); _stats->bytes_read += size; @@ -412,7 +410,7 @@ public: virtual ~TinyColumnReader(); virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats); + MemPool* mem_pool, OlapReaderStatistics* stats); virtual Status seek(PositionProvider* positions); virtual Status skip(uint64_t row_count); virtual Status next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool); @@ -438,7 +436,7 @@ public: virtual ~IntegerColumnReaderWrapper() {} virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { Status res = ColumnReader::init(streams, size, mem_pool, stats); if (res.ok()) { @@ -471,9 +469,7 @@ public: return Status::OK(); } - virtual Status skip(uint64_t row_count) { - return _reader.skip(_count_none_nulls(row_count)); - } + virtual Status skip(uint64_t row_count) { return _reader.skip(_count_none_nulls(row_count)); } virtual Status next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool) { Status res = ColumnReader::next_vector(column_vector, size, mem_pool); @@ -538,7 +534,7 @@ public: virtual ~FixLengthStringColumnReader() {} virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { Status res = ColumnReader::init(streams, size, mem_pool, stats); if (res.ok()) { @@ -570,9 +566,7 @@ public: return Status::OK(); } - virtual Status skip(uint64_t row_count) { - return _reader.skip(_count_none_nulls(row_count)); - } + virtual Status skip(uint64_t row_count) { return _reader.skip(_count_none_nulls(row_count)); } virtual Status next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool) { Status res = ColumnReader::next_vector(column_vector, size, mem_pool); if (!res.ok()) { @@ -605,7 +599,7 @@ public: _max_length(max_length) {} virtual ~VarStringColumnReader() {} virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { Status res = ColumnReader::init(streams, size, mem_pool, stats); if (res.ok()) { res = _reader.init(streams, size, mem_pool); @@ -636,9 +630,7 @@ public: return Status::OK(); } - virtual Status skip(uint64_t row_count) { - return _reader.skip(_count_none_nulls(row_count)); - } + virtual Status skip(uint64_t row_count) { return _reader.skip(_count_none_nulls(row_count)); } virtual Status next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool) { Status res = ColumnReader::next_vector(column_vector, size, mem_pool); @@ -672,7 +664,7 @@ public: virtual ~FloatintPointColumnReader() {} virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats) { + MemPool* mem_pool, OlapReaderStatistics* stats) { if (nullptr == streams) { OLAP_LOG_WARNING("input streams is nullptr"); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -790,11 +782,11 @@ public: DecimalColumnReader(uint32_t column_id, uint32_t column_unique_id); virtual ~DecimalColumnReader(); Status init(std::map* streams, int size, MemPool* mem_pool, - OlapReaderStatistics* stats) override; + OlapReaderStatistics* stats) override; virtual Status seek(PositionProvider* positions) override; virtual Status skip(uint64_t row_count) override; virtual Status next_vector(ColumnVector* column_vector, uint32_t size, - MemPool* mem_pool) override; + MemPool* mem_pool) override; virtual size_t get_buffer_size() override { return sizeof(RunLengthByteReader) * 2; } @@ -810,7 +802,7 @@ public: LargeIntColumnReader(uint32_t column_id, uint32_t column_unique_id); virtual ~LargeIntColumnReader(); virtual Status init(std::map* streams, int size, - MemPool* mem_pool, OlapReaderStatistics* stats); + MemPool* mem_pool, OlapReaderStatistics* stats); virtual Status seek(PositionProvider* positions); virtual Status skip(uint64_t row_count); virtual Status next_vector(ColumnVector* column_vector, uint32_t size, MemPool* mem_pool); diff --git a/be/src/olap/rowset/column_writer.cpp b/be/src/olap/rowset/column_writer.cpp index 6128ab4f6f..a78bda0f09 100644 --- a/be/src/olap/rowset/column_writer.cpp +++ b/be/src/olap/rowset/column_writer.cpp @@ -671,8 +671,8 @@ Status VarStringColumnWriter::finalize(ColumnDataHeaderMessage* header) { } // id_writer is practical to data_stream, it doesn't matter if you repeat flush - if (!_length_writer->flush() || !_id_writer->flush() || - !_dict_stream->flush() || !_data_stream->flush()) { + if (!_length_writer->flush() || !_id_writer->flush() || !_dict_stream->flush() || + !_data_stream->flush()) { OLAP_LOG_WARNING("fail to flush stream."); return Status::OLAPInternalError(OLAP_ERR_WRITER_DATA_WRITE_ERROR); } diff --git a/be/src/olap/rowset/rowset.cpp b/be/src/olap/rowset/rowset.cpp index c07586349d..7003fd5929 100644 --- a/be/src/olap/rowset/rowset.cpp +++ b/be/src/olap/rowset/rowset.cpp @@ -21,7 +21,8 @@ namespace doris { -Rowset::Rowset(const TabletSchema* schema, const FilePathDesc& rowset_path_desc, RowsetMetaSharedPtr rowset_meta) +Rowset::Rowset(const TabletSchema* schema, const FilePathDesc& rowset_path_desc, + RowsetMetaSharedPtr rowset_meta) : _schema(schema), _rowset_path_desc(rowset_path_desc), _rowset_meta(std::move(rowset_meta)), @@ -53,9 +54,10 @@ Status Rowset::load(bool use_cache) { } } // load is done - VLOG_CRITICAL << "rowset is loaded. " << rowset_id() << ", rowset version:" << rowset_meta()->version() - << ", state from ROWSET_UNLOADED to ROWSET_LOADED. tabletid:" - << _rowset_meta->tablet_id(); + VLOG_CRITICAL << "rowset is loaded. " << rowset_id() + << ", rowset version:" << rowset_meta()->version() + << ", state from ROWSET_UNLOADED to ROWSET_LOADED. tabletid:" + << _rowset_meta->tablet_id(); return Status::OK(); } diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index 2e91f98588..8ca1ee5551 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -127,8 +127,8 @@ public: // The first/last tuple must be start_key/end_key.to_tuple(). If we can't divide the input range, // the result `ranges` should be [start_key.to_tuple(), end_key.to_tuple()] virtual Status split_range(const RowCursor& start_key, const RowCursor& end_key, - uint64_t request_block_row_count, size_t key_num, - std::vector* ranges) = 0; + uint64_t request_block_row_count, size_t key_num, + std::vector* ranges) = 0; const RowsetMetaSharedPtr& rowset_meta() const { return _rowset_meta; } @@ -198,8 +198,10 @@ public: // copy all files to `dir` virtual Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) = 0; - virtual Status upload_files_to(const FilePathDesc& dir_desc, - const RowsetId&, bool delete_src = false) { return Status::OK(); } + virtual Status upload_files_to(const FilePathDesc& dir_desc, const RowsetId&, + bool delete_src = false) { + return Status::OK(); + } virtual Status remove_old_files(std::vector* files_to_remove) = 0; @@ -209,7 +211,9 @@ public: virtual bool check_file_exist() = 0; // return an unique identifier string for this rowset - std::string unique_id() const { return _rowset_path_desc.filepath + "/" + rowset_id().to_string(); } + std::string unique_id() const { + return _rowset_path_desc.filepath + "/" + rowset_id().to_string(); + } bool need_delete_file() const { return _need_delete_file; } @@ -217,9 +221,7 @@ public: bool contains_version(Version version) { return rowset_meta()->version().contains(version); } - FilePathDesc rowset_path_desc() { - return _rowset_path_desc; - } + FilePathDesc rowset_path_desc() { return _rowset_path_desc; } static bool comparator(const RowsetSharedPtr& left, const RowsetSharedPtr& right) { return left->end_version() < right->end_version(); @@ -256,7 +258,8 @@ protected: DISALLOW_COPY_AND_ASSIGN(Rowset); // this is non-public because all clients should use RowsetFactory to obtain pointer to initialized Rowset - Rowset(const TabletSchema* schema, const FilePathDesc& rowset_path_desc, RowsetMetaSharedPtr rowset_meta); + Rowset(const TabletSchema* schema, const FilePathDesc& rowset_path_desc, + RowsetMetaSharedPtr rowset_meta); // this is non-public because all clients should use RowsetFactory to obtain pointer to initialized Rowset virtual Status init() = 0; diff --git a/be/src/olap/rowset/rowset_factory.cpp b/be/src/olap/rowset/rowset_factory.cpp index 077ac7ca6c..361290fe51 100644 --- a/be/src/olap/rowset/rowset_factory.cpp +++ b/be/src/olap/rowset/rowset_factory.cpp @@ -28,8 +28,9 @@ namespace doris { -Status RowsetFactory::create_rowset(const TabletSchema* schema, const FilePathDesc& rowset_path_desc, - RowsetMetaSharedPtr rowset_meta, RowsetSharedPtr* rowset) { +Status RowsetFactory::create_rowset(const TabletSchema* schema, + const FilePathDesc& rowset_path_desc, + RowsetMetaSharedPtr rowset_meta, RowsetSharedPtr* rowset) { if (rowset_meta->rowset_type() == ALPHA_ROWSET) { rowset->reset(new AlphaRowset(schema, rowset_path_desc, rowset_meta)); return (*rowset)->init(); @@ -42,7 +43,7 @@ Status RowsetFactory::create_rowset(const TabletSchema* schema, const FilePathDe } Status RowsetFactory::create_rowset_writer(const RowsetWriterContext& context, - std::unique_ptr* output) { + std::unique_ptr* output) { if (context.rowset_type == ALPHA_ROWSET) { output->reset(new AlphaRowsetWriter); return (*output)->init(context); diff --git a/be/src/olap/rowset/rowset_factory.h b/be/src/olap/rowset/rowset_factory.h index 42e8f4c8e8..48d82eeb2f 100644 --- a/be/src/olap/rowset/rowset_factory.h +++ b/be/src/olap/rowset/rowset_factory.h @@ -31,15 +31,14 @@ class RowsetFactory { public: // return OLAP_SUCCESS and set inited rowset in `*rowset`. // return others if failed to create or init rowset. - static Status create_rowset(const TabletSchema* schema, - const FilePathDesc& rowset_path_desc, - RowsetMetaSharedPtr rowset_meta, RowsetSharedPtr* rowset); + static Status create_rowset(const TabletSchema* schema, const FilePathDesc& rowset_path_desc, + RowsetMetaSharedPtr rowset_meta, RowsetSharedPtr* rowset); // create and init rowset writer. // return OLAP_SUCCESS and set `*output` to inited rowset writer. // return others if failed static Status create_rowset_writer(const RowsetWriterContext& context, - std::unique_ptr* output); + std::unique_ptr* output); }; } // namespace doris diff --git a/be/src/olap/rowset/rowset_meta_manager.cpp b/be/src/olap/rowset/rowset_meta_manager.cpp index 25a90986b1..46e9c6cde5 100644 --- a/be/src/olap/rowset/rowset_meta_manager.cpp +++ b/be/src/olap/rowset/rowset_meta_manager.cpp @@ -42,8 +42,8 @@ bool RowsetMetaManager::check_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, } Status RowsetMetaManager::get_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, - const RowsetId& rowset_id, - RowsetMetaSharedPtr rowset_meta) { + const RowsetId& rowset_id, + RowsetMetaSharedPtr rowset_meta) { std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + rowset_id.to_string(); std::string value; Status s = meta->get(META_COLUMN_FAMILY_INDEX, key, &value); @@ -65,8 +65,8 @@ Status RowsetMetaManager::get_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, } Status RowsetMetaManager::get_json_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, - const RowsetId& rowset_id, - std::string* json_rowset_meta) { + const RowsetId& rowset_id, + std::string* json_rowset_meta) { RowsetMetaSharedPtr rowset_meta_ptr(new (std::nothrow) RowsetMeta()); Status status = get_rowset_meta(meta, tablet_uid, rowset_id, rowset_meta_ptr); if (!status.ok()) { @@ -81,7 +81,7 @@ Status RowsetMetaManager::get_json_rowset_meta(OlapMeta* meta, TabletUid tablet_ } Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, - const RowsetMetaPB& rowset_meta_pb) { + const RowsetMetaPB& rowset_meta_pb) { std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + rowset_id.to_string(); std::string value; bool ret = rowset_meta_pb.SerializeToString(&value); @@ -94,8 +94,7 @@ Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const Rowse return status; } -Status RowsetMetaManager::remove(OlapMeta* meta, TabletUid tablet_uid, - const RowsetId& rowset_id) { +Status RowsetMetaManager::remove(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id) { std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + rowset_id.to_string(); VLOG_NOTICE << "start to remove rowset, key:" << key; Status status = meta->remove(META_COLUMN_FAMILY_INDEX, key); @@ -128,7 +127,7 @@ Status RowsetMetaManager::traverse_rowset_metas( } Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta, - const std::string& rowset_meta_path) { + const std::string& rowset_meta_path) { std::ifstream infile(rowset_meta_path); char buffer[1024]; std::string json_rowset_meta; diff --git a/be/src/olap/rowset/rowset_meta_manager.h b/be/src/olap/rowset/rowset_meta_manager.h index 10d3345547..8c8f3144e0 100644 --- a/be/src/olap/rowset/rowset_meta_manager.h +++ b/be/src/olap/rowset/rowset_meta_manager.h @@ -32,15 +32,14 @@ class RowsetMetaManager { public: static bool check_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id); - static Status get_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, - const RowsetId& rowset_id, RowsetMetaSharedPtr rowset_meta); + static Status get_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, + RowsetMetaSharedPtr rowset_meta); static Status get_json_rowset_meta(OlapMeta* meta, TabletUid tablet_uid, - const RowsetId& rowset_id, - std::string* json_rowset_meta); + const RowsetId& rowset_id, std::string* json_rowset_meta); static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, - const RowsetMetaPB& rowset_meta_pb); + const RowsetMetaPB& rowset_meta_pb); static Status remove(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id); diff --git a/be/src/olap/rowset/rowset_writer.h b/be/src/olap/rowset/rowset_writer.h index d23a477714..b349743a44 100644 --- a/be/src/olap/rowset/rowset_writer.h +++ b/be/src/olap/rowset/rowset_writer.h @@ -53,7 +53,7 @@ public: // Precondition: the input `rowset` should have the same type of the rowset we're building virtual Status add_rowset_for_linked_schema_change(RowsetSharedPtr rowset, - const SchemaMapping& schema_mapping) = 0; + const SchemaMapping& schema_mapping) = 0; virtual Status add_rowset_for_migration(RowsetSharedPtr rowset) = 0; @@ -64,7 +64,7 @@ public: virtual Status flush_single_memtable(MemTable* memtable, int64_t* flush_size) { return Status::OLAPInternalError(OLAP_ERR_FUNC_NOT_IMPLEMENTED); } - + // finish building and return pointer to the built rowset (guaranteed to be inited). // return nullptr when failed virtual RowsetSharedPtr build() = 0; diff --git a/be/src/olap/rowset/segment_group.cpp b/be/src/olap/rowset/segment_group.cpp index e83eea429c..20efe59e2f 100644 --- a/be/src/olap/rowset/segment_group.cpp +++ b/be/src/olap/rowset/segment_group.cpp @@ -46,30 +46,29 @@ namespace doris { if (!_index_loaded) { \ OLAP_LOG_WARNING("fail to find, index is not loaded. [segment_group_id=%d]", \ _segment_group_id); \ - return Status::OLAPInternalError(OLAP_ERR_NOT_INITED); \ + return Status::OLAPInternalError(OLAP_ERR_NOT_INITED); \ } \ } while (0); -#define POS_PARAM_VALIDATE(pos) \ - do { \ - if (nullptr == pos) { \ - OLAP_LOG_WARNING("fail to find, nullptr position parameter."); \ - return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); \ - } \ +#define POS_PARAM_VALIDATE(pos) \ + do { \ + if (nullptr == pos) { \ + OLAP_LOG_WARNING("fail to find, nullptr position parameter."); \ + return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); \ + } \ } while (0); -#define SLICE_PARAM_VALIDATE(slice) \ - do { \ - if (nullptr == slice) { \ - OLAP_LOG_WARNING("fail to find, nullptr slice parameter."); \ - return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); \ - } \ +#define SLICE_PARAM_VALIDATE(slice) \ + do { \ + if (nullptr == slice) { \ + OLAP_LOG_WARNING("fail to find, nullptr slice parameter."); \ + return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); \ + } \ } while (0); SegmentGroup::SegmentGroup(int64_t tablet_id, const RowsetId& rowset_id, const TabletSchema* schema, - const std::string& rowset_path_prefix, Version version, - bool delete_flag, int32_t segment_group_id, - int32_t num_segments) + const std::string& rowset_path_prefix, Version version, bool delete_flag, + int32_t segment_group_id, int32_t num_segments) : _tablet_id(tablet_id), _rowset_id(rowset_id), _schema(schema), @@ -408,16 +407,14 @@ Status SegmentGroup::load_pb(const char* file, uint32_t seg_id) { FileHandler seg_file_handler; res = seg_file_handler.open(file, O_RDONLY); if (!res.ok()) { - LOG(WARNING) << "failed to open segment file. err=" << res - << ", file=" << file; + LOG(WARNING) << "failed to open segment file. err=" << res << ", file=" << file; return res; } res = seg_file_header.unserialize(&seg_file_handler); if (!res.ok()) { seg_file_handler.close(); - LOG(WARNING) << "fail to unserialize header. err=" << res - << ", path=" << file; + LOG(WARNING) << "fail to unserialize header. err=" << res << ", path=" << file; return res; } @@ -470,8 +467,8 @@ bool SegmentGroup::check() { return true; } -Status SegmentGroup::find_short_key(const RowCursor& key, RowCursor* helper_cursor, - bool find_last, RowBlockPosition* pos) const { +Status SegmentGroup::find_short_key(const RowCursor& key, RowCursor* helper_cursor, bool find_last, + RowBlockPosition* pos) const { SEGMENT_GROUP_PARAM_VALIDATE(); POS_PARAM_VALIDATE(pos); @@ -530,7 +527,7 @@ Status SegmentGroup::find_next_row_block(RowBlockPosition* pos, bool* eof) const } Status SegmentGroup::find_mid_point(const RowBlockPosition& low, const RowBlockPosition& high, - RowBlockPosition* output, uint32_t* dis) const { + RowBlockPosition* output, uint32_t* dis) const { *dis = compute_distance(low, high); if (*dis >= _index.count()) { return Status::OLAPInternalError(OLAP_ERR_INDEX_EOF); @@ -545,15 +542,14 @@ Status SegmentGroup::find_mid_point(const RowBlockPosition& low, const RowBlockP } Status SegmentGroup::find_prev_point(const RowBlockPosition& current, - RowBlockPosition* prev) const { + RowBlockPosition* prev) const { OLAPIndexOffset current_offset = _index.get_offset(current); OLAPIndexOffset prev_offset = _index.prev(current_offset); return _index.get_row_block_position(prev_offset, prev); } -Status SegmentGroup::advance_row_block(int64_t num_row_blocks, - RowBlockPosition* position) const { +Status SegmentGroup::advance_row_block(int64_t num_row_blocks, RowBlockPosition* position) const { SEGMENT_GROUP_PARAM_VALIDATE(); POS_PARAM_VALIDATE(position); @@ -791,7 +787,7 @@ Status SegmentGroup::copy_files_to(const std::string& dir) { // case 1: clone from old version be // case 2: upgrade to new version be Status SegmentGroup::convert_from_old_files(const std::string& snapshot_path, - std::vector* success_links) { + std::vector* success_links) { if (_empty) { // the segment group is empty, it does not have files, just return return Status::OK(); @@ -839,7 +835,7 @@ Status SegmentGroup::convert_from_old_files(const std::string& snapshot_path, } Status SegmentGroup::convert_to_old_files(const std::string& snapshot_path, - std::vector* success_links) { + std::vector* success_links) { if (_empty) { return Status::OK(); } @@ -926,7 +922,7 @@ Status SegmentGroup::remove_old_files(std::vector* links_to_remove) } Status SegmentGroup::link_segments_to_path(const std::string& dest_path, - const RowsetId& rowset_id) { + const RowsetId& rowset_id) { if (dest_path.empty()) { LOG(WARNING) << "dest path is empty, return error"; return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); @@ -1009,12 +1005,11 @@ std::string SegmentGroup::_construct_old_file_path(const std::string& path_prefi char file_path[OLAP_MAX_PATH_LEN]; if (_segment_group_id == -1) { snprintf(file_path, sizeof(file_path), "%s/%ld_%ld_%ld_%d%s", path_prefix.c_str(), - _tablet_id, _version.first, _version.second, segment_id, - suffix.c_str()); + _tablet_id, _version.first, _version.second, segment_id, suffix.c_str()); } else { snprintf(file_path, sizeof(file_path), "%s/%ld_%ld_%ld_%d_%d%s", path_prefix.c_str(), - _tablet_id, _version.first, _version.second, _segment_group_id, - segment_id, suffix.c_str()); + _tablet_id, _version.first, _version.second, _segment_group_id, segment_id, + suffix.c_str()); } return file_path; @@ -1025,9 +1020,8 @@ std::string SegmentGroup::_construct_err_sg_file_path(const std::string& path_pr int32_t segment_id, const std::string& suffix) const { char file_path[OLAP_MAX_PATH_LEN]; - snprintf(file_path, sizeof(file_path), "%s/%ld_%ld_%ld_%d%s", path_prefix.c_str(), - _tablet_id, _version.first, _version.second, segment_id, - suffix.c_str()); + snprintf(file_path, sizeof(file_path), "%s/%ld_%ld_%ld_%d%s", path_prefix.c_str(), _tablet_id, + _version.first, _version.second, segment_id, suffix.c_str()); return file_path; } diff --git a/be/src/olap/rowset/segment_group.h b/be/src/olap/rowset/segment_group.h index 8ea2ce3cd5..e2031b7dd7 100644 --- a/be/src/olap/rowset/segment_group.h +++ b/be/src/olap/rowset/segment_group.h @@ -49,8 +49,8 @@ class SegmentGroup { public: SegmentGroup(int64_t tablet_id, const RowsetId& rowset_id, const TabletSchema* tablet_schema, - const std::string& rowset_path_prefix, Version version, - bool delete_flag, int segment_group_id, int32_t num_segments); + const std::string& rowset_path_prefix, Version version, bool delete_flag, + int segment_group_id, int32_t num_segments); SegmentGroup(int64_t tablet_id, const RowsetId& rowset_id, const TabletSchema* tablet_schema, const std::string& rowset_path_prefix, bool delete_flag, int32_t segment_group_id, @@ -74,7 +74,7 @@ public: const std::vector>& zone_map_fields); Status add_zone_maps(std::vector>& zone_map_strings, - std::vector& null_vec); + std::vector& null_vec); const std::vector>& get_zone_maps() { return _zone_maps; @@ -89,7 +89,7 @@ public: // Finds position of first row block contain the smallest key equal // to or greater than 'key'. Returns true on success. Status find_short_key(const RowCursor& key, RowCursor* helper_cursor, bool find_last, - RowBlockPosition* position) const; + RowBlockPosition* position) const; // Returns position of the first row block in the index. Status find_first_row_block(RowBlockPosition* position) const; @@ -106,7 +106,7 @@ public: // the midpoint between those two positions. Returns the distance // between low and high as computed by ComputeDistance. Status find_mid_point(const RowBlockPosition& low, const RowBlockPosition& high, - RowBlockPosition* output, uint32_t* dis) const; + RowBlockPosition* output, uint32_t* dis) const; Status find_prev_point(const RowBlockPosition& current, RowBlockPosition* prev) const; @@ -152,9 +152,7 @@ public: bool delete_flag() const { return _delete_flag; } int32_t segment_group_id() const { return _segment_group_id; } - void set_segment_group_id(int32_t segment_group_id) { - _segment_group_id = segment_group_id; - } + void set_segment_group_id(int32_t segment_group_id) { _segment_group_id = segment_group_id; } PUniqueId load_id() const { return _load_id; } void set_load_id(const PUniqueId& load_id) { _load_id = load_id; } @@ -232,10 +230,10 @@ public: const RowsetId& rowset_id() { return _rowset_id; } Status convert_from_old_files(const std::string& snapshot_path, - std::vector* success_links); + std::vector* success_links); Status convert_to_old_files(const std::string& snapshot_path, - std::vector* success_links); + std::vector* success_links); Status remove_old_files(std::vector* links_to_remove); diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index 7126d272d7..1d2606cadb 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -225,8 +225,8 @@ Status SegmentReader::init(bool is_using_cache) { return Status::OK(); } -Status SegmentReader::seek_to_block(uint32_t first_block, uint32_t last_block, - bool without_filter, uint32_t* next_block_id, bool* eof) { +Status SegmentReader::seek_to_block(uint32_t first_block, uint32_t last_block, bool without_filter, + uint32_t* next_block_id, bool* eof) { Status res = Status::OK(); if (!_is_data_loaded) { @@ -781,8 +781,7 @@ Status SegmentReader::_create_reader(size_t* buffer_size) { return Status::OK(); } -Status SegmentReader::_seek_to_block_directly(int64_t block_id, - const std::vector& cids) { +Status SegmentReader::_seek_to_block_directly(int64_t block_id, const std::vector& cids) { if (!_need_to_seek_block && block_id == _current_block_id) { // no need to execute seek return Status::OK(); diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h index d6cc119aba..2dcbe5c2a6 100644 --- a/be/src/olap/rowset/segment_reader.h +++ b/be/src/olap/rowset/segment_reader.h @@ -80,7 +80,7 @@ public: // block with next_block_id would read if get_block called again. // this field is used to set batch's limit when client found logical end is reach Status seek_to_block(uint32_t first_block, uint32_t last_block, bool without_filter, - uint32_t* next_block_id, bool* eof); + uint32_t* next_block_id, bool* eof); // get vector batch from this segment. // next_block_id: @@ -166,7 +166,7 @@ private: // Load the index, read the index of the required column into memory Status _load_index(bool is_using_cache); - // Read all the columns, the complete stream, (here just create the stream, because there is no mmap in the orc file, + // Read all the columns, the complete stream, (here just create the stream, because there is no mmap in the orc file, // it means the actual data is read, but there is no actual read here, just circle the required range) Status _read_all_data_streams(size_t* buffer_size); @@ -309,7 +309,7 @@ private: uint8_t* _include_blocks; uint32_t _remain_block; bool _need_block_filter; // Used in combination with include blocks, if none of them are in, no longer read - bool _is_using_mmap; // When this flag is true, use mmap to read the file + bool _is_using_mmap; // When this flag is true, use mmap to read the file bool _is_data_loaded; size_t _buffer_size; diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index 580e89f4c6..a52dde719a 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -208,7 +208,8 @@ Status BinaryDictPageDecoder::init() { // And then copy the strings corresponding to the codewords to the destination buffer const auto* type_info = get_scalar_type_info(); RETURN_IF_ERROR(ColumnVectorBatch::create(0, false, type_info, nullptr, &_batch)); - _data_page_decoder.reset(_bit_shuffle_ptr = new BitShufflePageDecoder(_data, _options)); + _data_page_decoder.reset( + _bit_shuffle_ptr = new BitShufflePageDecoder(_data, _options)); } else if (_encoding_type == PLAIN_ENCODING) { DCHECK_EQ(_encoding_type, PLAIN_ENCODING); _data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options)); @@ -237,7 +238,7 @@ void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* dict_decoder, StringRe _dict_word_info = dict_word_info; }; -Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr &dst) { +Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { if (_encoding_type == PLAIN_ENCODING) { dst = dst->convert_to_predicate_column_if_dictionary(); return _data_page_decoder->next_batch(n, dst); @@ -245,22 +246,24 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr // dictionary encoding DCHECK(_parsed); DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr"; - + if (PREDICT_FALSE(*n == 0 || _bit_shuffle_ptr->_cur_index >= _bit_shuffle_ptr->_num_elements)) { *n = 0; return Status::OK(); } - - size_t max_fetch = std::min(*n, static_cast(_bit_shuffle_ptr->_num_elements - _bit_shuffle_ptr->_cur_index)); + + size_t max_fetch = std::min(*n, static_cast(_bit_shuffle_ptr->_num_elements - + _bit_shuffle_ptr->_cur_index)); *n = max_fetch; - + const auto* data_array = reinterpret_cast(_bit_shuffle_ptr->_chunk.data); size_t start_index = _bit_shuffle_ptr->_cur_index; - dst->insert_many_dict_data(data_array, start_index, _dict_word_info, max_fetch, _dict_decoder->_num_elems); + dst->insert_many_dict_data(data_array, start_index, _dict_word_info, max_fetch, + _dict_decoder->_num_elems); _bit_shuffle_ptr->_cur_index += max_fetch; - + return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h index f5630ade02..fbe046caac 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.h +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h @@ -105,7 +105,7 @@ public: Status next_batch(size_t* n, ColumnBlockView* dst) override; - Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override; + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override; size_t count() const override { return _data_page_decoder->count(); } diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h index 061884fb2b..e655baca9c 100644 --- a/be/src/olap/rowset/segment_v2/binary_plain_page.h +++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h @@ -154,7 +154,6 @@ private: class BinaryPlainPageDecoder : public PageDecoder { public: - BinaryPlainPageDecoder(Slice data) : BinaryPlainPageDecoder(data, PageDecoderOptions()) {} BinaryPlainPageDecoder(Slice data, const PageDecoderOptions& options) @@ -230,7 +229,7 @@ public: return Status::OK(); } - Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override { + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { DCHECK(_parsed); if (PREDICT_FALSE(*n == 0 || _cur_idx >= _num_elems)) { *n = 0; @@ -241,13 +240,14 @@ public: uint32_t len_array[max_fetch]; uint32_t start_offset_array[max_fetch]; for (int i = 0; i < max_fetch; i++, _cur_idx++) { - const uint32_t start_offset = offset(_cur_idx); + const uint32_t start_offset = offset(_cur_idx); uint32_t len = offset(_cur_idx + 1) - start_offset; len_array[i] = len; start_offset_array[i] = start_offset; } - dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array, max_fetch); - + dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array, + max_fetch); + *n = max_fetch; return Status::OK(); }; @@ -280,10 +280,12 @@ public: } for (int i = 0; i < (int)_num_elems - 1; ++i) { - dict_word_info[i].size = (char*)dict_word_info[i+1].data - (char*)dict_word_info[i].data; + dict_word_info[i].size = + (char*)dict_word_info[i + 1].data - (char*)dict_word_info[i].data; } - dict_word_info[_num_elems-1].size = (data_begin + _offsets_pos) - (char*)dict_word_info[_num_elems-1].data; + dict_word_info[_num_elems - 1].size = + (data_begin + _offsets_pos) - (char*)dict_word_info[_num_elems - 1].data; } private: diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.h b/be/src/olap/rowset/segment_v2/binary_prefix_page.h index 7d7bcc9a92..bec5f3f7b4 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.h +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.h @@ -113,7 +113,7 @@ public: Status next_batch(size_t* n, ColumnBlockView* dst) override; - Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override { + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { return Status::NotSupported("binary prefix page not implement vec op now"); }; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index d39302f000..cc88552f9c 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -122,15 +122,17 @@ Status ColumnReader::init() { _bf_index_meta = &index_meta.bloom_filter_index(); break; default: - return Status::Corruption(strings::Substitute( - "Bad file $0: invalid column index type $1", _path_desc.filepath, index_meta.type())); + return Status::Corruption( + strings::Substitute("Bad file $0: invalid column index type $1", + _path_desc.filepath, index_meta.type())); } } // ArrayColumnWriter writes a single empty array and flushes. In this scenario, // the item writer doesn't write any data and the corresponding ordinal index is empty. if (_ordinal_index_meta == nullptr && !is_empty()) { - return Status::Corruption(strings::Substitute( - "Bad file $0: missing ordinal index for column $1", _path_desc.filepath, _meta.column_id())); + return Status::Corruption( + strings::Substitute("Bad file $0: missing ordinal index for column $1", + _path_desc.filepath, _meta.column_id())); } return Status::OK(); } @@ -243,7 +245,7 @@ Status ColumnReader::_get_filtered_pages(CondColumn* cond_column, CondColumn* de } } VLOG(1) << "total-pages: " << page_size << " not-filtered-pages: " << page_indexes->size() - << " filtered-percent:" << 1.0 - (page_indexes->size()*1.0)/(page_size*1.0); + << " filtered-percent:" << 1.0 - (page_indexes->size() * 1.0) / (page_size * 1.0); return Status::OK(); } @@ -581,7 +583,8 @@ Status FileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has return Status::OK(); } -Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr &dst, bool* has_null) { +Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { size_t curr_size = dst->byte_size(); size_t remaining = *n; *has_null = false; @@ -773,9 +776,9 @@ Status DefaultValueColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, b return Status::OK(); } -void DefaultValueColumnIterator::insert_default_data(vectorized::MutableColumnPtr &dst, size_t n) { +void DefaultValueColumnIterator::insert_default_data(vectorized::MutableColumnPtr& dst, size_t n) { vectorized::Int128 int128; - char* data_ptr = (char *) &int128; + char* data_ptr = (char*)&int128; size_t data_len = sizeof(int128); auto insert_column_data = [&]() { @@ -785,52 +788,54 @@ void DefaultValueColumnIterator::insert_default_data(vectorized::MutableColumnPt }; switch (_type_info->type()) { - case OLAP_FIELD_TYPE_OBJECT: - case OLAP_FIELD_TYPE_HLL:{ - dst->insert_many_defaults(n); - break; - } + case OLAP_FIELD_TYPE_OBJECT: + case OLAP_FIELD_TYPE_HLL: { + dst->insert_many_defaults(n); + break; + } - case OLAP_FIELD_TYPE_DATE: { - assert(_type_size == sizeof(FieldTypeTraits::CppType)); //uint24_t - std::string str = FieldTypeTraits::to_string(_mem_value); + case OLAP_FIELD_TYPE_DATE: { + assert(_type_size == sizeof(FieldTypeTraits::CppType)); //uint24_t + std::string str = FieldTypeTraits::to_string(_mem_value); - vectorized::VecDateTimeValue value; - value.from_date_str(str.c_str(), str.length()); - value.cast_to_date(); - //TODO: here is int128 = int64, here rely on the logic of little endian - int128 = binary_cast(value); - insert_column_data(); - break; - } - case OLAP_FIELD_TYPE_DATETIME: { - assert(_type_size == sizeof(FieldTypeTraits::CppType)); //int64_t - std::string str = FieldTypeTraits::to_string(_mem_value); + vectorized::VecDateTimeValue value; + value.from_date_str(str.c_str(), str.length()); + value.cast_to_date(); + //TODO: here is int128 = int64, here rely on the logic of little endian + int128 = binary_cast(value); + insert_column_data(); + break; + } + case OLAP_FIELD_TYPE_DATETIME: { + assert(_type_size == sizeof(FieldTypeTraits::CppType)); //int64_t + std::string str = FieldTypeTraits::to_string(_mem_value); - vectorized::VecDateTimeValue value; - value.from_date_str(str.c_str(), str.length()); - value.to_datetime(); + vectorized::VecDateTimeValue value; + value.from_date_str(str.c_str(), str.length()); + value.to_datetime(); - int128 = binary_cast(value); - insert_column_data(); - break; - } - case OLAP_FIELD_TYPE_DECIMAL: { - assert(_type_size == sizeof(FieldTypeTraits::CppType)); //decimal12_t - decimal12_t *d = (decimal12_t *) _mem_value; - int128 = DecimalV2Value(d->integer, d->fraction).value(); - insert_column_data(); - break; - } - default: { - data_ptr = (char *) _mem_value; - data_len = _type_size; - insert_column_data(); - } + int128 = binary_cast(value); + insert_column_data(); + break; + } + case OLAP_FIELD_TYPE_DECIMAL: { + assert(_type_size == + sizeof(FieldTypeTraits::CppType)); //decimal12_t + decimal12_t* d = (decimal12_t*)_mem_value; + int128 = DecimalV2Value(d->integer, d->fraction).value(); + insert_column_data(); + break; + } + default: { + data_ptr = (char*)_mem_value; + data_len = _type_size; + insert_column_data(); + } } } -Status DefaultValueColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr &dst, bool* has_null) { +Status DefaultValueColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { if (_is_default_value_null) { *has_null = true; dst->insert_many_defaults(*n); diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index 87770acdb1..7ebf917b45 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -118,7 +118,8 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* length_options.meta->set_unique_id(2); length_options.meta->set_type(length_type); length_options.meta->set_is_nullable(false); - length_options.meta->set_length(get_scalar_type_info()->size()); + length_options.meta->set_length( + get_scalar_type_info()->size()); length_options.meta->set_encoding(DEFAULT_ENCODING); length_options.meta->set_compression(LZ4F); @@ -145,7 +146,8 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_options.meta->set_unique_id(3); null_options.meta->set_type(null_type); null_options.meta->set_is_nullable(false); - null_options.meta->set_length(get_scalar_type_info()->size()); + null_options.meta->set_length( + get_scalar_type_info()->size()); null_options.meta->set_encoding(DEFAULT_ENCODING); null_options.meta->set_compression(LZ4F); diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 8b7cb60b32..5e5e3d3019 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -309,9 +309,7 @@ public: private: Status put_extra_info_in_page(DataPageFooterPB* header) override; Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记 - bool has_empty_items() const { - return _item_writer->get_next_rowid() == 0; - } + bool has_empty_items() const { return _item_writer->get_next_rowid() == 0; } private: std::unique_ptr _length_writer; diff --git a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h index df8fe06a1e..909aa35b76 100644 --- a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h +++ b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h @@ -161,7 +161,7 @@ public: return Status::OK(); } - Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override { + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { return Status::NotSupported("frame page not implement vec op now"); }; diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h index 77ddb1776f..0cb2dafa39 100644 --- a/be/src/olap/rowset/segment_v2/options.h +++ b/be/src/olap/rowset/segment_v2/options.h @@ -32,7 +32,7 @@ struct PageBuilderOptions { size_t dict_page_size = DEFAULT_PAGE_SIZE; }; -struct PageDecoderOptions { }; +struct PageDecoderOptions {}; } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp index ba6ab0f7e4..b00e4db8cc 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp @@ -35,7 +35,8 @@ void OrdinalIndexWriter::append_entry(ordinal_t ordinal, const PagePointer& data } Status OrdinalIndexWriter::finish(fs::WritableBlock* wblock, ColumnIndexMetaPB* meta) { - CHECK(_page_builder->count() > 0) << "no entry has been added, filepath=" << wblock->path_desc().filepath; + CHECK(_page_builder->count() > 0) + << "no entry has been added, filepath=" << wblock->path_desc().filepath; meta->set_type(ORDINAL_INDEX); BTreeMetaPB* root_page_meta = meta->mutable_ordinal_index()->mutable_root_page(); diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h index ab5b482e29..3b0861f6b5 100644 --- a/be/src/olap/rowset/segment_v2/page_decoder.h +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -82,7 +82,7 @@ public: // allocated in the column_vector_view's mem_pool. virtual Status next_batch(size_t* n, ColumnBlockView* dst) = 0; - virtual Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) { + virtual Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { return Status::NotSupported("not implement vec op now"); } diff --git a/be/src/olap/rowset/segment_v2/page_io.cpp b/be/src/olap/rowset/segment_v2/page_io.cpp index 739cde1597..fe7d3c32e8 100644 --- a/be/src/olap/rowset/segment_v2/page_io.cpp +++ b/be/src/olap/rowset/segment_v2/page_io.cpp @@ -47,7 +47,7 @@ Status PageIO::compress_page_body(const BlockCompressionCodec* codec, double min Slice compressed_slice(buf); RETURN_IF_ERROR(codec->compress(body, &compressed_slice)); buf.resize(compressed_slice.get_size()); - + double space_saving = 1.0 - static_cast(buf.size()) / uncompressed_size; // return compressed body only when it saves more than min_space_saving if (space_saving > 0 && space_saving >= min_space_saving) { @@ -116,8 +116,10 @@ Status PageIO::read_and_decompress_page(const PageReadOptions& opts, PageHandle* auto cache = StoragePageCache::instance(); PageCacheHandle cache_handle; - StoragePageCache::CacheKey cache_key(opts.rblock->path_desc().filepath, opts.page_pointer.offset); - if (opts.use_page_cache && cache->is_cache_available(opts.type) && cache->lookup(cache_key, &cache_handle, opts.type)) { + StoragePageCache::CacheKey cache_key(opts.rblock->path_desc().filepath, + opts.page_pointer.offset); + if (opts.use_page_cache && cache->is_cache_available(opts.type) && + cache->lookup(cache_key, &cache_handle, opts.type)) { // we find page in cache, use it *handle = PageHandle(std::move(cache_handle)); opts.stats->cached_pages_num++; diff --git a/be/src/olap/rowset/segment_v2/parsed_page.h b/be/src/olap/rowset/segment_v2/parsed_page.h index 7299ccf593..1b05e5dbd3 100644 --- a/be/src/olap/rowset/segment_v2/parsed_page.h +++ b/be/src/olap/rowset/segment_v2/parsed_page.h @@ -67,8 +67,8 @@ struct ParsedPage { return Status::OK(); } - ~ParsedPage() { - delete data_decoder; + ~ParsedPage() { + delete data_decoder; data_decoder = nullptr; } diff --git a/be/src/olap/rowset/segment_v2/rle_page.h b/be/src/olap/rowset/segment_v2/rle_page.h index 52a9613cf4..5944a47c4f 100644 --- a/be/src/olap/rowset/segment_v2/rle_page.h +++ b/be/src/olap/rowset/segment_v2/rle_page.h @@ -230,7 +230,7 @@ public: return Status::OK(); } - Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override { + Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) override { DCHECK(_parsed); if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) { *n = 0; diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 857d674470..a6fc9fe7fe 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -59,12 +59,13 @@ using SegmentSharedPtr = std::shared_ptr; // change finished, client should disable all cached Segment for old TabletSchema. class Segment : public std::enable_shared_from_this { public: - static Status open(const FilePathDesc& path_desc, uint32_t segment_id, const TabletSchema* tablet_schema, - std::shared_ptr* output); + static Status open(const FilePathDesc& path_desc, uint32_t segment_id, + const TabletSchema* tablet_schema, std::shared_ptr* output); ~Segment(); - Status new_iterator(const Schema& schema, const StorageReadOptions& read_options, std::unique_ptr* iter); + Status new_iterator(const Schema& schema, const StorageReadOptions& read_options, + std::unique_ptr* iter); uint64_t id() const { return _segment_id; } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 5eebb30a88..857c679455 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -46,8 +46,8 @@ SegmentWriter::SegmentWriter(fs::WritableBlock* wblock, uint32_t segment_id, _max_row_per_segment(max_row_per_segment), _opts(opts), _wblock(wblock), - _mem_tracker( - MemTracker::create_virtual_tracker(-1, "SegmentWriter:Segment-" + std::to_string(segment_id))), + _mem_tracker(MemTracker::create_virtual_tracker( + -1, "SegmentWriter:Segment-" + std::to_string(segment_id))), _olap_data_convertor(tablet_schema) { CHECK_NOTNULL(_wblock); size_t num_short_key_column = _tablet_schema->num_short_key_columns(); @@ -151,22 +151,22 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po RETURN_IF_ERROR(_index_builder->add_item(encoded_key)); key_column_fields.clear(); } - + _row_count += num_rows; _olap_data_convertor.clear_source_content(); return Status::OK(); } int64_t SegmentWriter::max_row_to_add(size_t row_avg_size_in_bytes) { - int64_t size_rows = ((int64_t)MAX_SEGMENT_SIZE - (int64_t)estimate_segment_size()) / row_avg_size_in_bytes; + int64_t size_rows = + ((int64_t)MAX_SEGMENT_SIZE - (int64_t)estimate_segment_size()) / row_avg_size_in_bytes; int64_t count_rows = (int64_t)_max_row_per_segment - _row_count; return std::min(size_rows, count_rows); } - -std::string SegmentWriter::encode_short_keys( - const std::vector key_column_fields, bool null_first) { +std::string SegmentWriter::encode_short_keys(const std::vector key_column_fields, + bool null_first) { size_t num_key_columns = _tablet_schema->num_short_key_columns(); assert(key_column_fields.size() == num_key_columns && _short_key_coders.size() == num_key_columns && @@ -184,8 +184,7 @@ std::string SegmentWriter::encode_short_keys( continue; } encoded_keys.push_back(KEY_NORMAL_MARKER); - _short_key_coders[cid]->encode_ascending(field, _short_key_index_size[cid], - &encoded_keys); + _short_key_coders[cid]->encode_ascending(field, _short_key_index_size[cid], &encoded_keys); } return encoded_keys; } @@ -229,8 +228,9 @@ uint64_t SegmentWriter::estimate_segment_size() { Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size) { // check disk capacity - if (_data_dir != nullptr && _data_dir->reach_capacity_limit((int64_t) estimate_segment_size())) { - return Status::InternalError(fmt::format("disk {} exceed capacity limit.", _data_dir->path_hash())); + if (_data_dir != nullptr && _data_dir->reach_capacity_limit((int64_t)estimate_segment_size())) { + return Status::InternalError( + fmt::format("disk {} exceed capacity limit.", _data_dir->path_hash())); } for (auto& column_writer : _column_writers) { RETURN_IF_ERROR(column_writer->finish()); @@ -326,4 +326,4 @@ Status SegmentWriter::_write_raw_data(const std::vector& slices) { } } // namespace segment_v2 -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index cc047e19c9..ab928b51e1 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -60,9 +60,8 @@ struct SegmentWriterOptions { class SegmentWriter { public: explicit SegmentWriter(fs::WritableBlock* block, uint32_t segment_id, - const TabletSchema* tablet_schema, - DataDir* data_dir, uint32_t max_row_per_segment, - const SegmentWriterOptions& opts); + const TabletSchema* tablet_schema, DataDir* data_dir, + uint32_t max_row_per_segment, const SegmentWriterOptions& opts); ~SegmentWriter(); Status init(uint32_t write_mbytes_per_sec); @@ -80,7 +79,8 @@ public: Status finalize(uint64_t* segment_file_size, uint64_t* index_size); - static void init_column_meta(ColumnMetaPB* meta, uint32_t* column_id, const TabletColumn& column); + static void init_column_meta(ColumnMetaPB* meta, uint32_t* column_id, + const TabletColumn& column); private: DISALLOW_COPY_AND_ASSIGN(SegmentWriter); @@ -113,10 +113,10 @@ private: uint32_t _row_count = 0; vectorized::OlapBlockDataConvertor _olap_data_convertor; - std::vector< const KeyCoder* > _short_key_coders; - std::vector< uint16_t > _short_key_index_size; + std::vector _short_key_coders; + std::vector _short_key_index_size; size_t _short_key_row_pos = 0; }; } // namespace segment_v2 -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/olap/rowset/segment_writer.cpp b/be/src/olap/rowset/segment_writer.cpp index be71be2115..26bb0544c0 100644 --- a/be/src/olap/rowset/segment_writer.cpp +++ b/be/src/olap/rowset/segment_writer.cpp @@ -220,7 +220,7 @@ Status SegmentWriter::finalize(uint32_t* segment_file_size) { } } if (!(res = file_handle.open_with_mode(_file_name, O_CREAT | O_EXCL | O_WRONLY, - S_IRUSR | S_IWUSR))) { + S_IRUSR | S_IWUSR))) { LOG(WARNING) << "fail to open file. [file_name=" << _file_name << "]"; return res; } @@ -237,7 +237,7 @@ Status SegmentWriter::finalize(uint32_t* segment_file_size) { } if (!(res = file_handle.open_with_mode(_file_name, O_CREAT | O_EXCL | O_WRONLY, - S_IRUSR | S_IWUSR))) { + S_IRUSR | S_IWUSR))) { LOG(WARNING) << "fail to open file. [file_name=" << _file_name << "]"; return res; } diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index ce32620cf2..a36547e475 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -104,7 +104,8 @@ public: static vectorized::IColumn::MutablePtr get_predicate_column_ptr(FieldType type); - static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(FieldType type, bool is_null = false); + static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(FieldType type, + bool is_null = false); const std::vector& columns() const { return _cols; } diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index 1d55b29e0b..ec1f2dc0f2 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -61,7 +61,7 @@ public: const SchemaMapping& get_schema_mapping() const { return _schema_mapping; } Status change_row_block(const RowBlock* ref_block, int32_t data_version, - RowBlock* mutable_block, uint64_t* filtered_rows) const; + RowBlock* mutable_block, uint64_t* filtered_rows) const; private: // @brief column-mapping specification of new schema @@ -94,9 +94,8 @@ public: SchemaChange() : _filtered_rows(0), _merged_rows(0) {} virtual ~SchemaChange() = default; - virtual Status process(RowsetReaderSharedPtr rowset_reader, - RowsetWriter* new_rowset_builder, TabletSharedPtr tablet, - TabletSharedPtr base_tablet) = 0; + virtual Status process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_builder, + TabletSharedPtr tablet, TabletSharedPtr base_tablet) = 0; void add_filtered_rows(uint64_t filtered_rows) { _filtered_rows += filtered_rows; } @@ -122,7 +121,7 @@ public: ~LinkedSchemaChange() {} virtual Status process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer, - TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override; + TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override; private: const RowBlockChanger& _row_block_changer; @@ -138,7 +137,7 @@ public: virtual ~SchemaChangeDirectly(); virtual Status process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer, - TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override; + TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override; private: const RowBlockChanger& _row_block_changer; @@ -157,9 +156,8 @@ public: size_t memory_limitation); virtual ~SchemaChangeWithSorting(); - virtual Status process(RowsetReaderSharedPtr rowset_reader, - RowsetWriter* new_rowset_builder, TabletSharedPtr new_tablet, - TabletSharedPtr base_tablet) override; + virtual Status process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_builder, + TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override; private: bool _internal_sorting(const std::vector& row_block_arr, @@ -186,23 +184,22 @@ public: } Status schema_version_convert(TabletSharedPtr base_tablet, TabletSharedPtr new_tablet, - RowsetSharedPtr* base_rowset, RowsetSharedPtr* new_rowset); + RowsetSharedPtr* base_rowset, RowsetSharedPtr* new_rowset); // schema change v2, it will not set alter task in base tablet Status process_alter_tablet_v2(const TAlterTabletReqV2& request); private: - // Check the status of schema change and clear information between "a pair" of Schema change tables // Since A->B's schema_change information for A will be overwritten in subsequent processing (no extra cleanup here) // Returns: // Success: If there is historical information, then clear it if there is no problem; or no historical information // Failure: otherwise, if there is history information and it cannot be emptied (version has not been completed) Status _check_and_clear_schema_change_info(TabletSharedPtr tablet, - const TAlterTabletReq& request); + const TAlterTabletReq& request); Status _get_versions_to_be_changed(TabletSharedPtr base_tablet, - std::vector* versions_to_be_changed); + std::vector* versions_to_be_changed); struct AlterMaterializedViewParam { std::string column_name; @@ -225,16 +222,14 @@ private: Status _convert_historical_rowsets(const SchemaChangeParams& sc_params); - static Status _parse_request( - TabletSharedPtr base_tablet, TabletSharedPtr new_tablet, RowBlockChanger* rb_changer, - bool* sc_sorting, bool* sc_directly, - const std::unordered_map& - materialized_function_map); + static Status _parse_request(TabletSharedPtr base_tablet, TabletSharedPtr new_tablet, + RowBlockChanger* rb_changer, bool* sc_sorting, bool* sc_directly, + const std::unordered_map& + materialized_function_map); // Initialization Settings for creating a default value static Status _init_column_mapping(ColumnMapping* column_mapping, - const TabletColumn& column_schema, - const std::string& value); + const TabletColumn& column_schema, const std::string& value); private: SchemaChangeHandler(); @@ -245,4 +240,3 @@ private: using RowBlockDeleter = std::function; } // namespace doris - diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 609159bdb5..5b62c92e24 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -58,7 +58,7 @@ void SegmentLoader::_insert(const SegmentLoader::CacheKey& key, SegmentLoader::C } Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, - SegmentCacheHandle* cache_handle, bool use_cache) { + SegmentCacheHandle* cache_handle, bool use_cache) { SegmentLoader::CacheKey cache_key(rowset->rowset_id()); if (_lookup(cache_key, cache_handle)) { cache_handle->owned = false; diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index c155209614..8541ae6ed3 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -46,16 +46,13 @@ class SegmentCacheHandle; using BetaRowsetSharedPtr = std::shared_ptr; class SegmentLoader { public: - // The cache key or segment lru cache struct CacheKey { CacheKey(RowsetId rowset_id_) : rowset_id(rowset_id_) {} RowsetId rowset_id; // Encode to a flat binary which can be used as LRUCache's key - std::string encode() const { - return rowset_id.to_string(); - } + std::string encode() const { return rowset_id.to_string(); } }; // The cache value of segment lru cache. @@ -84,7 +81,8 @@ public: // Load segments of "rowset", return the "cache_handle" which contains segments. // If use_cache is true, it will be loaded from _cache. - Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, bool use_cache = false); + Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, + bool use_cache = false); // Try to prune the segment cache if expired. Status prune(); @@ -125,7 +123,7 @@ public: CHECK(!owned); // last_visit_time is set when release. // because it only be needed when pruning. - ((SegmentLoader::CacheValue*) _cache->value(_handle))->last_visit_time = UnixMillis(); + ((SegmentLoader::CacheValue*)_cache->value(_handle))->last_visit_time = UnixMillis(); _cache->release(_handle); } } @@ -147,9 +145,9 @@ public: std::vector& get_segments() { if (owned) { - return segments; + return segments; } else { - return ((SegmentLoader::CacheValue*) _cache->value(_handle))->segments; + return ((SegmentLoader::CacheValue*)_cache->value(_handle))->segments; } } diff --git a/be/src/olap/serialize.h b/be/src/olap/serialize.h index 01feeb3a9c..28c424bd1f 100644 --- a/be/src/olap/serialize.h +++ b/be/src/olap/serialize.h @@ -64,9 +64,9 @@ inline Status read_var_signed(ReadOnlyFileStream* stream, int64_t* value) { return res; } -// The bit_width in RunLengthIntegerWriter is all 5bit encoding, -// so it supports up to 2^5=32 bit lengths. However, it needs to represent at most 1~64 bits, -// a total of 64 bit lengths, so in 64 bit lengths Take 32 types. +// The bit_width in RunLengthIntegerWriter is all 5bit encoding, +// so it supports up to 2^5=32 bit lengths. However, it needs to represent at most 1~64 bits, +// a total of 64 bit lengths, so in 64 bit lengths Take 32 types. // The remaining 32 bit lengths that are not in these 32 types are aligned up to the nearest bit length. // FixedBitSize gives 32 bit lengths enum FixedBitSize { @@ -182,4 +182,3 @@ inline bool is_safe_subtract(int64_t left, int64_t right) { } // namespace ser } // namespace doris - diff --git a/be/src/olap/skiplist.h b/be/src/olap/skiplist.h index 0e28d3c9cc..fe691f8cf8 100644 --- a/be/src/olap/skiplist.h +++ b/be/src/olap/skiplist.h @@ -460,4 +460,3 @@ bool SkipList::Find(const Key& key, Hint* hint) const { } } // namespace doris - diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index c1c38ac6bc..4b9be15ef5 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -62,7 +62,7 @@ SnapshotManager* SnapshotManager::instance() { } Status SnapshotManager::make_snapshot(const TSnapshotRequest& request, string* snapshot_path, - bool* allow_incremental_clone) { + bool* allow_incremental_clone) { SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); Status res = Status::OK(); if (snapshot_path == nullptr) { @@ -121,13 +121,14 @@ Status SnapshotManager::release_snapshot(const string& snapshot_path) { // For now, alpha and beta rowset meta have same fields, so we can just use // AlphaRowsetMeta here. Status SnapshotManager::convert_rowset_ids(const FilePathDesc& clone_dir_desc, int64_t tablet_id, - const int32_t& schema_hash) { + const int32_t& schema_hash) { SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); Status res = Status::OK(); // check clone dir existed if (!FileUtils::check_exist(clone_dir_desc.filepath)) { res = Status::OLAPInternalError(OLAP_ERR_DIR_NOT_EXIST); - LOG(WARNING) << "clone dir not existed when convert rowsetids. clone_dir=" << clone_dir_desc.debug_string(); + LOG(WARNING) << "clone dir not existed when convert rowsetids. clone_dir=" + << clone_dir_desc.debug_string(); return res; } @@ -192,9 +193,9 @@ Status SnapshotManager::convert_rowset_ids(const FilePathDesc& clone_dir_desc, i } Status SnapshotManager::_rename_rowset_id(const RowsetMetaPB& rs_meta_pb, - const FilePathDesc& new_path_desc, TabletSchema& tablet_schema, - const RowsetId& rowset_id, - RowsetMetaPB* new_rs_meta_pb) { + const FilePathDesc& new_path_desc, + TabletSchema& tablet_schema, const RowsetId& rowset_id, + RowsetMetaPB* new_rs_meta_pb) { Status res = Status::OK(); // TODO use factory to obtain RowsetMeta when SnapshotManager::convert_rowset_ids supports beta rowset // TODO(cmy): now we only has AlphaRowsetMeta, and no BetaRowsetMeta. @@ -248,7 +249,7 @@ Status SnapshotManager::_rename_rowset_id(const RowsetMetaPB& rs_meta_pb, // get snapshot path: curtime.seq.timeout // eg: 20190819221234.3.86400 Status SnapshotManager::_calc_snapshot_id_path(const TabletSharedPtr& tablet, int64_t timeout_s, - string* out_path) { + string* out_path) { Status res = Status::OK(); if (out_path == nullptr) { LOG(WARNING) << "output parameter cannot be null"; @@ -300,9 +301,9 @@ Status SnapshotManager::_link_index_and_data_files( } Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet, - const TSnapshotRequest& request, - string* snapshot_path, - bool* allow_incremental_clone) { + const TSnapshotRequest& request, + string* snapshot_path, + bool* allow_incremental_clone) { int32_t snapshot_version = request.preferred_snapshot_version; LOG(INFO) << "receive a make snapshot request" << ", request detail is " << apache::thrift::ThriftDebugString(request) @@ -338,7 +339,8 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet FileUtils::remove_all(schema_full_path_desc.filepath); } - RETURN_WITH_WARN_IF_ERROR(FileUtils::create_dir(schema_full_path_desc.filepath), Status::OLAPInternalError(OLAP_ERR_CANNOT_CREATE_DIR), + RETURN_WITH_WARN_IF_ERROR(FileUtils::create_dir(schema_full_path_desc.filepath), + Status::OLAPInternalError(OLAP_ERR_CANNOT_CREATE_DIR), "create path " + schema_full_path_desc.filepath + " failed"); string snapshot_id; @@ -444,7 +446,7 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet // Clear it for safety reason. // Whether it is incremental or full snapshot, rowset information is stored in rs_meta. new_tablet_meta->revise_rs_metas(std::move(rs_metas)); - + if (snapshot_version == g_Types_constants.TSNAPSHOT_REQ_VERSION2) { res = new_tablet_meta->save(header_path); } else { diff --git a/be/src/olap/snapshot_manager.h b/be/src/olap/snapshot_manager.h index d5560ddf45..75c00180f5 100644 --- a/be/src/olap/snapshot_manager.h +++ b/be/src/olap/snapshot_manager.h @@ -49,7 +49,7 @@ public: /// snapshot_path: out param, the dir of snapshot /// allow_incremental_clone: out param, true if it is an incremental clone Status make_snapshot(const TSnapshotRequest& request, std::string* snapshot_path, - bool* allow_incremental_clone); + bool* allow_incremental_clone); FilePathDesc get_schema_hash_full_path(const TabletSharedPtr& ref_tablet, const FilePathDesc& location_desc) const; @@ -61,7 +61,7 @@ public: static SnapshotManager* instance(); Status convert_rowset_ids(const FilePathDesc& clone_dir_desc, int64_t tablet_id, - const int32_t& schema_hash); + const int32_t& schema_hash); private: SnapshotManager() : _snapshot_base_id(0) { @@ -70,25 +70,24 @@ private: } Status _calc_snapshot_id_path(const TabletSharedPtr& tablet, int64_t timeout_s, - std::string* out_path); + std::string* out_path); std::string _get_header_full_path(const TabletSharedPtr& ref_tablet, const std::string& schema_hash_path) const; Status _link_index_and_data_files(const FilePathDesc& header_path_desc, - const TabletSharedPtr& ref_tablet, - const std::vector& consistent_rowsets); + const TabletSharedPtr& ref_tablet, + const std::vector& consistent_rowsets); Status _create_snapshot_files(const TabletSharedPtr& ref_tablet, - const TSnapshotRequest& request, std::string* snapshot_path, - bool* allow_incremental_clone); + const TSnapshotRequest& request, std::string* snapshot_path, + bool* allow_incremental_clone); - Status _prepare_snapshot_dir(const TabletSharedPtr& ref_tablet, - std::string* snapshot_id_path); + Status _prepare_snapshot_dir(const TabletSharedPtr& ref_tablet, std::string* snapshot_id_path); Status _rename_rowset_id(const RowsetMetaPB& rs_meta_pb, const FilePathDesc& new_path_desc, - TabletSchema& tablet_schema, const RowsetId& next_id, - RowsetMetaPB* new_rs_meta_pb); + TabletSchema& tablet_schema, const RowsetId& next_id, + RowsetMetaPB* new_rs_meta_pb); private: static SnapshotManager* _s_instance; @@ -102,4 +101,3 @@ private: }; // SnapshotManager } // namespace doris - diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 9816b1b6b5..8fd7b0997a 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -145,9 +145,8 @@ StorageEngine::StorageEngine(const EngineOptions& options) std::lock_guard lock(_gc_mutex); return _unused_rowsets.size(); }); - REGISTER_HOOK_METRIC(compaction_mem_consumption, [this]() { - return _compaction_mem_tracker->consumption(); - }); + REGISTER_HOOK_METRIC(compaction_mem_consumption, + [this]() { return _compaction_mem_tracker->consumption(); }); REGISTER_HOOK_METRIC(schema_change_mem_consumption, [this]() { return _schema_change_mem_tracker->consumption(); }); } @@ -347,7 +346,7 @@ template std::vector StorageEngine::get_stores(); template std::vector StorageEngine::get_stores(); Status StorageEngine::get_all_data_dir_info(std::vector* data_dir_infos, - bool need_update) { + bool need_update) { Status res = Status::OK(); data_dir_infos->clear(); @@ -487,8 +486,8 @@ std::vector StorageEngine::get_stores_for_create_tablet( if (it.second->is_used()) { if (_available_storage_medium_type_count == 1 || it.second->storage_medium() == storage_medium || - (it.second->storage_medium() == TStorageMedium::REMOTE_CACHE - && FilePathDesc::is_remote(storage_medium))) { + (it.second->storage_medium() == TStorageMedium::REMOTE_CACHE && + FilePathDesc::is_remote(storage_medium))) { stores.push_back(it.second); } } @@ -703,7 +702,8 @@ Status StorageEngine::start_trash_sweep(double* usage, bool ignore_guard) { FilePathDescStream trash_path_desc_s; trash_path_desc_s << info.path_desc << TRASH_PREFIX; FilePathDesc trash_path_desc = trash_path_desc_s.path_desc(); - curr_res = _do_sweep(trash_path_desc, local_now, curr_usage > guard_space ? 0 : trash_expire); + curr_res = + _do_sweep(trash_path_desc, local_now, curr_usage > guard_space ? 0 : trash_expire); if (!curr_res.ok()) { LOG(WARNING) << "failed to sweep trash. path=" << trash_path_desc.filepath << ", err_code=" << curr_res; @@ -800,7 +800,8 @@ void StorageEngine::_clean_unused_txns() { std::set tablet_infos; _txn_manager->get_all_related_tablets(&tablet_infos); for (auto& tablet_info : tablet_infos) { - TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_info.tablet_id, tablet_info.tablet_uid, true); + TabletSharedPtr tablet = + _tablet_manager->get_tablet(tablet_info.tablet_id, tablet_info.tablet_uid, true); if (tablet == nullptr) { // TODO(ygl) : should check if tablet still in meta, it's a improvement // case 1: tablet still in meta, just remove from memory @@ -815,7 +816,7 @@ void StorageEngine::_clean_unused_txns() { } Status StorageEngine::_do_sweep(const FilePathDesc& scan_root_desc, const time_t& local_now, - const int32_t expire) { + const int32_t expire) { Status res = Status::OK(); if (!FileUtils::check_exist(scan_root_desc.filepath)) { // dir not existed. no need to sweep trash. @@ -853,30 +854,36 @@ Status StorageEngine::_do_sweep(const FilePathDesc& scan_root_desc, const time_t std::string storage_name_path = path_name + "/" + STORAGE_NAME; if (scan_root_desc.is_remote() && FileUtils::check_exist(storage_name_path)) { FilePathDesc remote_path_desc = scan_root_desc; - if (!env_util::read_file_to_string(Env::Default(), storage_name_path, &(remote_path_desc.storage_name)).ok()) { + if (!env_util::read_file_to_string(Env::Default(), storage_name_path, + &(remote_path_desc.storage_name)) + .ok()) { LOG(WARNING) << "read storage_name failed: " << storage_name_path; continue; } boost::algorithm::trim(remote_path_desc.storage_name); - std::shared_ptr storage_backend = StorageBackendMgr::instance()-> - get_storage_backend(remote_path_desc.storage_name); + std::shared_ptr storage_backend = + StorageBackendMgr::instance()->get_storage_backend( + remote_path_desc.storage_name); // if storage_backend is nullptr, the remote storage is invalid. // Only the local path need to be removed. if (storage_backend != nullptr) { std::string remote_root_path; if (!StorageBackendMgr::instance()->get_root_path( - remote_path_desc.storage_name, &remote_root_path)) { - LOG(WARNING) << "read storage root_path failed: " << remote_path_desc.storage_name; + remote_path_desc.storage_name, &remote_root_path)) { + LOG(WARNING) << "read storage root_path failed: " + << remote_path_desc.storage_name; continue; } remote_path_desc.remote_path = remote_root_path + TRASH_PREFIX; std::filesystem::path local_path(path_name); std::stringstream remote_file_stream; - remote_file_stream << remote_path_desc.remote_path << "/" << local_path.filename().string(); + remote_file_stream << remote_path_desc.remote_path << "/" + << local_path.filename().string(); Status ret = storage_backend->rmdir(remote_file_stream.str()); if (!ret.ok()) { - LOG(WARNING) << "fail to remove file or directory. path=" << remote_file_stream.str() - << ", error=" << ret.to_string(); + LOG(WARNING) + << "fail to remove file or directory. path=" + << remote_file_stream.str() << ", error=" << ret.to_string(); res = Status::OLAPInternalError(OLAP_ERR_OS_ERROR); continue; } @@ -895,7 +902,8 @@ Status StorageEngine::_do_sweep(const FilePathDesc& scan_root_desc, const time_t } } } catch (...) { - LOG(WARNING) << "Exception occur when scan directory. path_desc=" << scan_root_desc.debug_string(); + LOG(WARNING) << "Exception occur when scan directory. path_desc=" + << scan_root_desc.debug_string(); res = Status::OLAPInternalError(OLAP_ERR_IO_ERROR); } @@ -965,7 +973,7 @@ Status StorageEngine::create_tablet(const TCreateTabletReq& request) { } Status StorageEngine::obtain_shard_path(TStorageMedium::type storage_medium, - std::string* shard_path, DataDir** store) { + std::string* shard_path, DataDir** store) { LOG(INFO) << "begin to process obtain root path. storage_medium=" << storage_medium; if (shard_path == nullptr) { @@ -997,7 +1005,7 @@ Status StorageEngine::obtain_shard_path(TStorageMedium::type storage_medium, } Status StorageEngine::load_header(const string& shard_path, const TCloneReq& request, - bool restore) { + bool restore) { LOG(INFO) << "begin to process load headers." << "tablet_id=" << request.tablet_id << ", schema_hash=" << request.schema_hash; Status res = Status::OK(); @@ -1068,8 +1076,7 @@ Status StorageEngine::execute_task(EngineTask* task) { std::vector related_tablets; std::vector> wrlocks; for (TabletInfo& tablet_info : tablet_infos) { - TabletSharedPtr tablet = - _tablet_manager->get_tablet(tablet_info.tablet_id); + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_info.tablet_id); if (tablet != nullptr) { related_tablets.push_back(tablet); wrlocks.push_back(std::unique_lock(tablet->get_header_lock())); @@ -1099,8 +1106,7 @@ Status StorageEngine::execute_task(EngineTask* task) { std::vector related_tablets; std::vector> wrlocks; for (TabletInfo& tablet_info : tablet_infos) { - TabletSharedPtr tablet = - _tablet_manager->get_tablet(tablet_info.tablet_id); + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_info.tablet_id); if (tablet != nullptr) { related_tablets.push_back(tablet); wrlocks.push_back(std::unique_lock(tablet->get_header_lock())); diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 5b6760252f..b3768cccf3 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -117,7 +117,7 @@ public: // @param [out] shard_path choose an available root_path to clone new tablet // @return error code Status obtain_shard_path(TStorageMedium::type storage_medium, std::string* shared_path, - DataDir** store); + DataDir** store); // Load new tablet to make it effective. // @@ -126,7 +126,7 @@ public: // @param [in] restore whether we're restoring a tablet from trash // @return OLAP_SUCCESS if load tablet success Status load_header(const std::string& shard_path, const TCloneReq& request, - bool restore = false); + bool restore = false); void register_report_listener(TaskWorkerPool* listener); void deregister_report_listener(TaskWorkerPool* listener); @@ -182,7 +182,9 @@ public: std::shared_ptr compaction_mem_tracker() { return _compaction_mem_tracker; } std::shared_ptr tablet_mem_tracker() { return _tablet_mem_tracker; } std::shared_ptr schema_change_mem_tracker() { return _schema_change_mem_tracker; } - std::shared_ptr storage_migration_mem_tracker() { return _storage_migration_mem_tracker; } + std::shared_ptr storage_migration_mem_tracker() { + return _storage_migration_mem_tracker; + } std::shared_ptr clone_mem_tracker() { return _clone_mem_tracker; } std::shared_ptr batch_load_mem_tracker() { return _batch_load_mem_tracker; } std::shared_ptr consistency_mem_tracker() { return _consistency_mem_tracker; } @@ -216,7 +218,7 @@ private: void _clean_unused_rowset_metas(); Status _do_sweep(const FilePathDesc& scan_root_desc, const time_t& local_tm_now, - const int32_t expire); + const int32_t expire); // All these xxx_callback() functions are for Background threads // unused rowset monitor thread @@ -397,4 +399,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/storage_migration_v2.cpp b/be/src/olap/storage_migration_v2.cpp index a0c5716bc5..b908ed1135 100644 --- a/be/src/olap/storage_migration_v2.cpp +++ b/be/src/olap/storage_migration_v2.cpp @@ -55,10 +55,10 @@ namespace doris { DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(storage_migration_mem_consumption, MetricUnit::BYTES, "", mem_consumption, Labels({{"type", "storage_migration"}})); - StorageMigrationV2Handler::StorageMigrationV2Handler() : _mem_tracker(MemTracker::create_tracker( - -1, "StorageMigrationV2Handler", StorageEngine::instance()->storage_migration_mem_tracker())) { + -1, "StorageMigrationV2Handler", + StorageEngine::instance()->storage_migration_mem_tracker())) { REGISTER_HOOK_METRIC(storage_migration_mem_consumption, [this]() { return _mem_tracker->consumption(); }); } @@ -67,7 +67,8 @@ StorageMigrationV2Handler::~StorageMigrationV2Handler() { DEREGISTER_HOOK_METRIC(storage_migration_mem_consumption); } -Status StorageMigrationV2Handler::process_storage_migration_v2(const TStorageMigrationReqV2& request) { +Status StorageMigrationV2Handler::process_storage_migration_v2( + const TStorageMigrationReqV2& request) { LOG(INFO) << "begin to do request storage_migration: base_tablet_id=" << request.base_tablet_id << ", new_tablet_id=" << request.new_tablet_id << ", migration_version=" << request.migration_version; @@ -79,7 +80,8 @@ Status StorageMigrationV2Handler::process_storage_migration_v2(const TStorageMig return Status::OLAPInternalError(OLAP_ERR_TABLE_NOT_FOUND); } // Lock schema_change_lock util schema change info is stored in tablet header - std::unique_lock schema_change_lock(base_tablet->get_schema_change_lock(), std::try_to_lock); + std::unique_lock schema_change_lock(base_tablet->get_schema_change_lock(), + std::try_to_lock); if (!schema_change_lock.owns_lock()) { LOG(WARNING) << "failed to obtain schema change lock. " << "base_tablet=" << request.base_tablet_id; @@ -91,7 +93,8 @@ Status StorageMigrationV2Handler::process_storage_migration_v2(const TStorageMig return res; } -Status StorageMigrationV2Handler::_do_process_storage_migration_v2(const TStorageMigrationReqV2& request) { +Status StorageMigrationV2Handler::_do_process_storage_migration_v2( + const TStorageMigrationReqV2& request) { Status res = Status::OK(); TabletSharedPtr base_tablet = StorageEngine::instance()->tablet_manager()->get_tablet( request.base_tablet_id, request.base_schema_hash); @@ -121,7 +124,8 @@ Status StorageMigrationV2Handler::_do_process_storage_migration_v2(const TStorag return res; } - LOG(INFO) << "finish to validate storage_migration request. begin to migrate data from base tablet " + LOG(INFO) << "finish to validate storage_migration request. begin to migrate data from base " + "tablet " "to new tablet" << " base_tablet=" << base_tablet->full_name() << " new_tablet=" << new_tablet->full_name(); @@ -217,11 +221,11 @@ Status StorageMigrationV2Handler::_do_process_storage_migration_v2(const TStorag } } - res = delete_handler.init(base_tablet->tablet_schema(), base_tablet->delete_predicates(), - end_version); + res = delete_handler.init(base_tablet->tablet_schema(), + base_tablet->delete_predicates(), end_version); if (!res.ok()) { - LOG(WARNING) << "init delete handler failed. base_tablet=" << base_tablet->full_name() - << ", end_version=" << end_version; + LOG(WARNING) << "init delete handler failed. base_tablet=" + << base_tablet->full_name() << ", end_version=" << end_version; // release delete handlers which have been inited successfully. delete_handler.finalize(); @@ -305,7 +309,8 @@ Status StorageMigrationV2Handler::_get_versions_to_be_changed( return Status::OK(); } -Status StorageMigrationV2Handler::_convert_historical_rowsets(const StorageMigrationParams& sm_params) { +Status StorageMigrationV2Handler::_convert_historical_rowsets( + const StorageMigrationParams& sm_params) { LOG(INFO) << "begin to convert historical rowsets for new_tablet from base_tablet." << " base_tablet=" << sm_params.base_tablet->full_name() << ", new_tablet=" << sm_params.new_tablet->full_name(); @@ -350,12 +355,12 @@ Status StorageMigrationV2Handler::_convert_historical_rowsets(const StorageMigra } if ((res = _generate_rowset_writer(sm_params.base_tablet->tablet_path_desc(), - sm_params.new_tablet->tablet_path_desc(), - rs_reader, rowset_writer.get(), new_tablet)) != OLAP_SUCCESS) { + sm_params.new_tablet->tablet_path_desc(), rs_reader, + rowset_writer.get(), new_tablet)) != OLAP_SUCCESS) { LOG(WARNING) << "failed to add_rowset. version=" << rs_reader->version().first << "-" << rs_reader->version().second; - new_tablet->data_dir()->remove_pending_ids( - ROWSET_ID_PREFIX + rowset_writer->rowset_id().to_string()); + new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + + rowset_writer->rowset_id().to_string()); goto PROCESS_ALTER_EXIT; } new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX + @@ -392,8 +397,8 @@ Status StorageMigrationV2Handler::_convert_historical_rowsets(const StorageMigra << " version=" << rs_reader->version().first << "-" << rs_reader->version().second; } - // XXX:The SchemaChange state should not be canceled at this time, because the new Delta has to be converted to the old and new Schema version - PROCESS_ALTER_EXIT : { +// XXX:The SchemaChange state should not be canceled at this time, because the new Delta has to be converted to the old and new Schema version +PROCESS_ALTER_EXIT : { // save tablet meta here because rowset meta is not saved during add rowset std::lock_guard new_wlock(sm_params.new_tablet->get_header_lock()); sm_params.new_tablet->save_meta(); @@ -409,8 +414,8 @@ Status StorageMigrationV2Handler::_convert_historical_rowsets(const StorageMigra return res; } -Status StorageMigrationV2Handler::_validate_migration_result(TabletSharedPtr new_tablet, - const TStorageMigrationReqV2& request) { +Status StorageMigrationV2Handler::_validate_migration_result( + TabletSharedPtr new_tablet, const TStorageMigrationReqV2& request) { Version max_continuous_version = {-1, 0}; new_tablet->max_continuous_version_from_beginning(&max_continuous_version); LOG(INFO) << "find max continuous version of tablet=" << new_tablet->full_name() @@ -434,13 +439,15 @@ Status StorageMigrationV2Handler::_validate_migration_result(TabletSharedPtr new return Status::OK(); } -Status StorageMigrationV2Handler::_generate_rowset_writer( - const FilePathDesc& src_desc, const FilePathDesc& dst_desc, - RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer, TabletSharedPtr new_tablet) { +Status StorageMigrationV2Handler::_generate_rowset_writer(const FilePathDesc& src_desc, + const FilePathDesc& dst_desc, + RowsetReaderSharedPtr rowset_reader, + RowsetWriter* new_rowset_writer, + TabletSharedPtr new_tablet) { if (!src_desc.is_remote() && dst_desc.is_remote()) { string remote_file_param_path = dst_desc.filepath + REMOTE_FILE_PARAM; rapidjson::StringBuffer strbuf; - rapidjson::PrettyWriter writer(strbuf); + rapidjson::PrettyWriter writer(strbuf); writer.StartObject(); writer.Key(TABLET_UID.c_str()); writer.String(TabletUid(new_tablet->tablet_uid()).to_string().c_str()); @@ -451,8 +458,8 @@ Status StorageMigrationV2Handler::_generate_rowset_writer( Env::Default(), Slice(std::string(strbuf.GetString())), remote_file_param_path); // strbuf.GetString() format: {"tablet_uid": "a84cfb67d3ad3d62-87fd8b3ae9bdad84", "storage_name": "s3_name"} if (!st.ok()) { - LOG(WARNING) << "fail to write tablet_uid and storage_name. path=" << remote_file_param_path - << ", error:" << st.to_string(); + LOG(WARNING) << "fail to write tablet_uid and storage_name. path=" + << remote_file_param_path << ", error:" << st.to_string(); return Status::OLAPInternalError(OLAP_ERR_COPY_FILE_ERROR); } LOG(INFO) << "write storage_param successfully: " << remote_file_param_path; diff --git a/be/src/olap/storage_migration_v2.h b/be/src/olap/storage_migration_v2.h index 47ca08d7e3..ae96c225ff 100644 --- a/be/src/olap/storage_migration_v2.h +++ b/be/src/olap/storage_migration_v2.h @@ -44,7 +44,6 @@ public: Status process_storage_migration_v2(const TStorageMigrationReqV2& request); private: - Status _get_versions_to_be_changed(TabletSharedPtr base_tablet, std::vector* versions_to_be_changed); @@ -57,13 +56,14 @@ private: Status _do_process_storage_migration_v2(const TStorageMigrationReqV2& request); - Status _validate_migration_result(TabletSharedPtr new_tablet, const TStorageMigrationReqV2& request); + Status _validate_migration_result(TabletSharedPtr new_tablet, + const TStorageMigrationReqV2& request); Status _convert_historical_rowsets(const StorageMigrationParams& sm_params); - Status _generate_rowset_writer( - const FilePathDesc& src_desc, const FilePathDesc& dst_desc, - RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer, TabletSharedPtr new_tablet); + Status _generate_rowset_writer(const FilePathDesc& src_desc, const FilePathDesc& dst_desc, + RowsetReaderSharedPtr rowset_reader, + RowsetWriter* new_rowset_writer, TabletSharedPtr new_tablet); private: StorageMigrationV2Handler(); diff --git a/be/src/olap/stream_index_common.h b/be/src/olap/stream_index_common.h index 25f501f140..6f686c409e 100644 --- a/be/src/olap/stream_index_common.h +++ b/be/src/olap/stream_index_common.h @@ -92,4 +92,3 @@ protected: }; } // namespace doris - diff --git a/be/src/olap/stream_index_reader.cpp b/be/src/olap/stream_index_reader.cpp index dd8ecaae2a..c90cc9dce2 100644 --- a/be/src/olap/stream_index_reader.cpp +++ b/be/src/olap/stream_index_reader.cpp @@ -22,8 +22,7 @@ namespace doris { PositionEntryReader::PositionEntryReader() : _positions(nullptr), _positions_count(0), _statistics_offset(0) {} -Status PositionEntryReader::init(StreamIndexHeader* header, FieldType type, - bool null_supported) { +Status PositionEntryReader::init(StreamIndexHeader* header, FieldType type, bool null_supported) { if (nullptr == header) { return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED); } @@ -84,7 +83,7 @@ StreamIndexReader::~StreamIndexReader() { } Status StreamIndexReader::init(char* buffer, size_t buffer_size, FieldType type, - bool is_using_cache, bool null_supported) { + bool is_using_cache, bool null_supported) { if (nullptr == buffer) { OLAP_LOG_WARNING("buffer given is invalid."); return Status::OLAPInternalError(OLAP_ERR_INPUT_PARAMETER_ERROR); diff --git a/be/src/olap/stream_index_reader.h b/be/src/olap/stream_index_reader.h index 92bbbebc02..4162ada5ae 100644 --- a/be/src/olap/stream_index_reader.h +++ b/be/src/olap/stream_index_reader.h @@ -70,7 +70,7 @@ public: ~StreamIndexReader(); Status init(char* buffer, size_t buffer_size, FieldType type, bool is_using_cache, - bool null_supported); + bool null_supported); const PositionEntryReader& entry(uint64_t entry_id); size_t entry_count(); @@ -89,4 +89,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/stream_index_writer.h b/be/src/olap/stream_index_writer.h index 6e60a7d918..fc41adfbf4 100644 --- a/be/src/olap/stream_index_writer.h +++ b/be/src/olap/stream_index_writer.h @@ -76,4 +76,3 @@ protected: }; } // namespace doris - diff --git a/be/src/olap/stream_name.h b/be/src/olap/stream_name.h index 304986548d..bbef562089 100644 --- a/be/src/olap/stream_name.h +++ b/be/src/olap/stream_name.h @@ -41,4 +41,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index e0d9068a86..71628cf4c3 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -57,12 +57,13 @@ DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_bytes, MetricUnit::BYTES); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_count, MetricUnit::OPERATIONS); TabletSharedPtr Tablet::create_tablet_from_meta(TabletMetaSharedPtr tablet_meta, - const StorageParamPB& storage_param, DataDir* data_dir) { + const StorageParamPB& storage_param, + DataDir* data_dir) { return std::make_shared(tablet_meta, storage_param, data_dir); } -Tablet::Tablet(TabletMetaSharedPtr tablet_meta, const StorageParamPB& storage_param, DataDir* data_dir, - const std::string& cumulative_compaction_type) +Tablet::Tablet(TabletMetaSharedPtr tablet_meta, const StorageParamPB& storage_param, + DataDir* data_dir, const std::string& cumulative_compaction_type) : BaseTablet(tablet_meta, storage_param, data_dir), _is_bad(false), _last_cumu_compaction_failure_millis(0), @@ -139,7 +140,7 @@ void Tablet::save_meta() { } Status Tablet::revise_tablet_meta(const std::vector& rowsets_to_clone, - const std::vector& versions_to_delete) { + const std::vector& versions_to_delete) { LOG(INFO) << "begin to revise tablet. tablet=" << full_name() << ", rowsets_to_clone=" << rowsets_to_clone.size() << ", versions_to_delete=" << versions_to_delete.size(); @@ -552,8 +553,7 @@ bool Tablet::_reconstruct_version_tracker_if_necessary() { } Status Tablet::capture_consistent_versions(const Version& spec_version, - std::vector* version_path, - bool quiet) const { + std::vector* version_path, bool quiet) const { Status status = _timestamped_version_tracker.capture_consistent_versions(spec_version, version_path); if (!status.ok() && !quiet) { @@ -602,15 +602,15 @@ void Tablet::acquire_version_and_rowsets( } Status Tablet::capture_consistent_rowsets(const Version& spec_version, - std::vector* rowsets) const { + std::vector* rowsets) const { std::vector version_path; RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path)); RETURN_NOT_OK(_capture_consistent_rowsets_unlocked(version_path, rowsets)); return Status::OK(); } -Status Tablet::_capture_consistent_rowsets_unlocked( - const std::vector& version_path, std::vector* rowsets) const { +Status Tablet::_capture_consistent_rowsets_unlocked(const std::vector& version_path, + std::vector* rowsets) const { DCHECK(rowsets != nullptr && rowsets->empty()); rowsets->reserve(version_path.size()); for (auto& version : version_path) { @@ -641,7 +641,7 @@ Status Tablet::_capture_consistent_rowsets_unlocked( } Status Tablet::capture_rs_readers(const Version& spec_version, - std::vector* rs_readers) const { + std::vector* rs_readers) const { std::vector version_path; RETURN_NOT_OK(capture_consistent_versions(spec_version, &version_path)); RETURN_NOT_OK(capture_rs_readers(version_path, rs_readers)); @@ -649,7 +649,7 @@ Status Tablet::capture_rs_readers(const Version& spec_version, } Status Tablet::capture_rs_readers(const std::vector& version_path, - std::vector* rs_readers) const { + std::vector* rs_readers) const { DCHECK(rs_readers != nullptr && rs_readers->empty()); for (auto version : version_path) { auto it = _rs_version_map.find(version); @@ -853,7 +853,7 @@ void Tablet::calculate_cumulative_point() { } Status Tablet::split_range(const OlapTuple& start_key_strings, const OlapTuple& end_key_strings, - uint64_t request_block_row_count, std::vector* ranges) { + uint64_t request_block_row_count, std::vector* ranges) { DCHECK(ranges != nullptr); size_t key_num = 0; @@ -1280,7 +1280,8 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info) { tablet_info->version_hash = 0; tablet_info->__set_partition_id(_tablet_meta->partition_id()); if (FilePathDesc::is_remote(_data_dir->storage_medium())) { - tablet_info->__set_storage_medium(fs::fs_util::get_t_storage_medium(_tablet_meta->storage_medium())); + tablet_info->__set_storage_medium( + fs::fs_util::get_t_storage_medium(_tablet_meta->storage_medium())); } else { tablet_info->__set_storage_medium(_data_dir->storage_medium()); } @@ -1341,7 +1342,8 @@ Status Tablet::prepare_compaction_and_calculate_permits(CompactionType compactio *permits = 0; if (res != Status::OLAPInternalError(OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSION)) { DorisMetrics::instance()->cumulative_compaction_request_failed->increment(1); - return Status::InternalError(fmt::format("prepare cumulative compaction with err: {}", res)); + return Status::InternalError( + fmt::format("prepare cumulative compaction with err: {}", res)); } // return OK if OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSION, so that we don't need to // print too much useless logs. @@ -1370,7 +1372,8 @@ Status Tablet::prepare_compaction_and_calculate_permits(CompactionType compactio *permits = 0; if (res != Status::OLAPInternalError(OLAP_ERR_BE_NO_SUITABLE_VERSION)) { DorisMetrics::instance()->base_compaction_request_failed->increment(1); - return Status::InternalError(fmt::format("prepare base compaction with err: {}", res)); + return Status::InternalError( + fmt::format("prepare base compaction with err: {}", res)); } // return OK if OLAP_ERR_BE_NO_SUITABLE_VERSION, so that we don't need to // print too much useless logs. diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index e043204af5..cf413a5064 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -53,7 +53,8 @@ using TabletSharedPtr = std::shared_ptr; class Tablet : public BaseTablet { public: static TabletSharedPtr create_tablet_from_meta(TabletMetaSharedPtr tablet_meta, - const StorageParamPB& storage_param, DataDir* data_dir = nullptr); + const StorageParamPB& storage_param, + DataDir* data_dir = nullptr); Tablet(TabletMetaSharedPtr tablet_meta, const StorageParamPB& storage_param, DataDir* data_dir, const std::string& cumulative_compaction_type = ""); @@ -69,7 +70,7 @@ public: void save_meta(); // Used in clone task, to update local meta when finishing a clone job Status revise_tablet_meta(const std::vector& rowsets_to_clone, - const std::vector& versions_to_delete); + const std::vector& versions_to_delete); const int64_t cumulative_layer_point() const; void set_cumulative_layer_point(int64_t new_point); @@ -118,8 +119,8 @@ public: // Given spec_version, find a continuous version path and store it in version_path. // If quiet is true, then only "does this path exist" is returned. Status capture_consistent_versions(const Version& spec_version, - std::vector* version_path, - bool quiet = false) const; + std::vector* version_path, + bool quiet = false) const; // if quiet is true, no error log will be printed if there are missing versions Status check_version_integrity(const Version& version, bool quiet = false); bool check_version_exist(const Version& version) const; @@ -127,12 +128,12 @@ public: std::vector>* version_rowsets) const; Status capture_consistent_rowsets(const Version& spec_version, - std::vector* rowsets) const; + std::vector* rowsets) const; Status capture_rs_readers(const Version& spec_version, - std::vector* rs_readers) const; + std::vector* rs_readers) const; Status capture_rs_readers(const std::vector& version_path, - std::vector* rs_readers) const; + std::vector* rs_readers) const; DelPredicateArray delete_predicates() { return _tablet_meta->delete_predicates(); } void add_delete_predicate(const DeletePredicatePB& delete_predicate, int64_t version); @@ -167,7 +168,7 @@ public: // operation for query Status split_range(const OlapTuple& start_key_strings, const OlapTuple& end_key_strings, - uint64_t request_block_row_count, std::vector* ranges); + uint64_t request_block_row_count, std::vector* ranges); void set_bad(bool is_bad) { _is_bad = is_bad; } @@ -271,7 +272,7 @@ private: /// but also delete the version in rowset meta vector. void _delete_stale_rowset_by_version(const Version& version); Status _capture_consistent_rowsets_unlocked(const std::vector& version_path, - std::vector* rowsets) const; + std::vector* rowsets) const; const uint32_t _calc_cumulative_compaction_score( std::shared_ptr cumulative_compaction_policy); @@ -453,4 +454,3 @@ inline size_t Tablet::row_size() const { } } // namespace doris - diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 4664cb7d35..83da1df3db 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -226,12 +226,18 @@ Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector path_desc.storage_medium = request.storage_medium; path_desc.storage_name = request.storage_param.storage_name; StorageParamPB storage_param; - Status st = StorageBackendMgr::instance()->get_storage_param(request.storage_param.storage_name, &storage_param); - if (!st.ok() || storage_param.DebugString() != fs::fs_util::get_storage_param_pb(request.storage_param).DebugString()) { - LOG(INFO) << "remote storage need to change, create it. storage_name: " << request.storage_param.storage_name; - RETURN_NOT_OK_STATUS_WITH_WARN(StorageBackendMgr::instance()->create_remote_storage( - fs::fs_util::get_storage_param_pb(request.storage_param)), - "remote storage create failed. storage_name: " + request.storage_param.storage_name); + Status st = StorageBackendMgr::instance()->get_storage_param( + request.storage_param.storage_name, &storage_param); + if (!st.ok() || + storage_param.DebugString() != + fs::fs_util::get_storage_param_pb(request.storage_param).DebugString()) { + LOG(INFO) << "remote storage need to change, create it. storage_name: " + << request.storage_param.storage_name; + RETURN_NOT_OK_STATUS_WITH_WARN( + StorageBackendMgr::instance()->create_remote_storage( + fs::fs_util::get_storage_param_pb(request.storage_param)), + "remote storage create failed. storage_name: " + + request.storage_param.storage_name); } } @@ -265,8 +271,9 @@ Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector // If we are doing schema-change, we should use the same data dir // TODO(lingbin): A litter trick here, the directory should be determined before // entering this method - if (request.storage_medium == base_tablet->data_dir()->path_desc().storage_medium - || (FilePathDesc::is_remote(request.storage_medium) && base_tablet->data_dir()->is_remote())) { + if (request.storage_medium == base_tablet->data_dir()->path_desc().storage_medium || + (FilePathDesc::is_remote(request.storage_medium) && + base_tablet->data_dir()->is_remote())) { stores.clear(); stores.push_back(base_tablet->data_dir()); } @@ -1040,7 +1047,8 @@ void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId t if (data_dir->is_remote() && FileUtils::check_exist(remote_file_param_path)) { // it means you must remove remote file for this segment first string json_buf; - Status s = env_util::read_file_to_string(Env::Default(), remote_file_param_path, &json_buf); + Status s = env_util::read_file_to_string(Env::Default(), remote_file_param_path, + &json_buf); if (!s.ok()) { LOG(WARNING) << "delete unused file error when read remote_file_param_path: " << remote_file_param_path; @@ -1051,8 +1059,8 @@ void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId t std::string tablet_uid = nullptr; rapidjson::Document dom; if (!dom.Parse(json_buf.c_str()).HasParseError()) { - if (dom.HasMember(TABLET_UID.c_str()) && dom[TABLET_UID.c_str()].IsString() - && dom.HasMember(STORAGE_NAME.c_str()) && dom[STORAGE_NAME.c_str()].IsString()) { + if (dom.HasMember(TABLET_UID.c_str()) && dom[TABLET_UID.c_str()].IsString() && + dom.HasMember(STORAGE_NAME.c_str()) && dom[STORAGE_NAME.c_str()].IsString()) { storage_name = dom[STORAGE_NAME.c_str()].GetString(); tablet_uid = dom[TABLET_UID.c_str()].GetString(); } @@ -1060,7 +1068,8 @@ void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId t if (!tablet_uid.empty() && !storage_name.empty()) { segment_desc.storage_name = storage_name; StorageParamPB storage_param; - if (StorageBackendMgr::instance()->get_storage_param(storage_name, &storage_param) != OLAP_SUCCESS) { + if (StorageBackendMgr::instance()->get_storage_param( + storage_name, &storage_param) != OLAP_SUCCESS) { LOG(WARNING) << "storage_name is invalid: " << storage_name; return; } @@ -1068,12 +1077,18 @@ void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId t // remote file may be exist, check and mv it to trash std::filesystem::path local_segment_path(schema_hash_path); std::stringstream remote_file_stream; - remote_file_stream << data_dir->path_desc().remote_path << DATA_PREFIX - << "/" << local_segment_path.parent_path().parent_path().filename().string() // shard - << "/" << local_segment_path.parent_path().filename().string() // tablet_path - << "/" << local_segment_path.filename().string() // segment_path - << "/" << tablet_uid; - segment_desc.storage_medium = fs::fs_util::get_t_storage_medium(storage_param.storage_medium()); + remote_file_stream + << data_dir->path_desc().remote_path << DATA_PREFIX << "/" + << local_segment_path.parent_path() + .parent_path() + .filename() + .string() // shard + << "/" + << local_segment_path.parent_path().filename().string() // tablet_path + << "/" << local_segment_path.filename().string() // segment_path + << "/" << tablet_uid; + segment_desc.storage_medium = + fs::fs_util::get_t_storage_medium(storage_param.storage_medium()); segment_desc.remote_path = remote_file_stream.str(); } } diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index c7c4d8990d..a2420a5d81 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -228,4 +228,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 68f8ca9eb2..7e1a9b40cc 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -35,9 +35,9 @@ using std::vector; namespace doris { Status TabletMeta::create(const TCreateTabletReq& request, const TabletUid& tablet_uid, - uint64_t shard_id, uint32_t next_unique_id, - const unordered_map& col_ordinal_to_unique_id, - TabletMetaSharedPtr* tablet_meta) { + uint64_t shard_id, uint32_t next_unique_id, + const unordered_map& col_ordinal_to_unique_id, + TabletMetaSharedPtr* tablet_meta) { tablet_meta->reset(new TabletMeta( request.table_id, request.partition_id, request.tablet_id, request.tablet_schema.schema_hash, shard_id, request.tablet_schema, next_unique_id, @@ -454,7 +454,6 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { tablet_meta_pb->set_remote_storage_name(_remote_storage_name); tablet_meta_pb->set_storage_medium(_storage_medium); - } uint32_t TabletMeta::mem_size() const { diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index bc883c103b..c0b165a7bb 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -72,9 +72,9 @@ using TabletMetaSharedPtr = std::shared_ptr; class TabletMeta { public: static Status create(const TCreateTabletReq& request, const TabletUid& tablet_uid, - uint64_t shard_id, uint32_t next_unique_id, - const std::unordered_map& col_ordinal_to_unique_id, - TabletMetaSharedPtr* tablet_meta); + uint64_t shard_id, uint32_t next_unique_id, + const std::unordered_map& col_ordinal_to_unique_id, + TabletMetaSharedPtr* tablet_meta); TabletMeta(); // Only remote_storage_name is needed in meta, it is a key used to get remote params from fe. @@ -82,8 +82,8 @@ public: TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, int32_t schema_hash, uint64_t shard_id, const TTabletSchema& tablet_schema, uint32_t next_unique_id, const std::unordered_map& col_ordinal_to_unique_id, - TabletUid tablet_uid, TTabletType::type tabletType, TStorageMedium::type t_storage_medium, - const std::string& remote_storage_name); + TabletUid tablet_uid, TTabletType::type tabletType, + TStorageMedium::type t_storage_medium, const std::string& remote_storage_name); // If need add a filed in TableMeta, filed init copy in copy construct function TabletMeta(const TabletMeta& tablet_meta); TabletMeta(TabletMeta&& tablet_meta) = delete; @@ -111,13 +111,13 @@ public: int64_t table_id() const; int64_t partition_id() const; int64_t tablet_id() const; - int32_t schema_hash() const; - int16_t shard_id() const; - void set_shard_id(int32_t shard_id); - int64_t creation_time() const; - void set_creation_time(int64_t creation_time); - int64_t cumulative_layer_point() const; - void set_cumulative_layer_point(int64_t new_point); + int32_t schema_hash() const; + int16_t shard_id() const; + void set_shard_id(int32_t shard_id); + int64_t creation_time() const; + void set_creation_time(int64_t creation_time); + int64_t cumulative_layer_point() const; + void set_cumulative_layer_point(int64_t new_point); size_t num_rows() const; // disk space occupied by tablet @@ -171,13 +171,9 @@ public: bool all_beta() const; - std::string remote_storage_name() const { - return _remote_storage_name; - } + std::string remote_storage_name() const { return _remote_storage_name; } - StorageMediumPB storage_medium() const { - return _storage_medium; - } + StorageMediumPB storage_medium() const { return _storage_medium; } private: Status _save_meta(DataDir* data_dir); @@ -335,4 +331,3 @@ bool operator==(const TabletMeta& a, const TabletMeta& b); bool operator!=(const TabletMeta& a, const TabletMeta& b); } // namespace doris - diff --git a/be/src/olap/tablet_meta_manager.cpp b/be/src/olap/tablet_meta_manager.cpp index 4a2e612ff5..80b07e4475 100644 --- a/be/src/olap/tablet_meta_manager.cpp +++ b/be/src/olap/tablet_meta_manager.cpp @@ -49,7 +49,7 @@ namespace doris { // there are some rowset meta in local meta store and in in-memory tablet meta // but not in tablet meta in local meta store Status TabletMetaManager::get_meta(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - TabletMetaSharedPtr tablet_meta) { + TabletMetaSharedPtr tablet_meta) { OlapMeta* meta = store->get_meta(); std::stringstream key_stream; key_stream << HEADER_PREFIX << tablet_id << "_" << schema_hash; @@ -69,7 +69,7 @@ Status TabletMetaManager::get_meta(DataDir* store, TTabletId tablet_id, TSchemaH } Status TabletMetaManager::get_json_meta(DataDir* store, TTabletId tablet_id, - TSchemaHash schema_hash, std::string* json_meta) { + TSchemaHash schema_hash, std::string* json_meta) { TabletMetaSharedPtr tablet_meta(new TabletMeta()); Status s = get_meta(store, tablet_id, schema_hash, tablet_meta); if (!s.ok()) { @@ -85,20 +85,22 @@ Status TabletMetaManager::get_json_meta(DataDir* store, TTabletId tablet_id, // 1. if term > 0 then save to remote meta store first using term // 2. save to local meta store Status TabletMetaManager::save(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - TabletMetaSharedPtr tablet_meta, const string& header_prefix) { + TabletMetaSharedPtr tablet_meta, const string& header_prefix) { std::string key = fmt::format("{}{}_{}", header_prefix, tablet_id, schema_hash); std::string value; tablet_meta->serialize(&value); OlapMeta* meta = store->get_meta(); - VLOG_NOTICE << "save tablet meta" << ", key:" << key << ", meta length:" << value.length(); + VLOG_NOTICE << "save tablet meta" + << ", key:" << key << ", meta length:" << value.length(); return meta->put(META_COLUMN_FAMILY_INDEX, key, value); } Status TabletMetaManager::save(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - const std::string& meta_binary, const string& header_prefix) { + const std::string& meta_binary, const string& header_prefix) { std::string key = fmt::format("{}{}_{}", header_prefix, tablet_id, schema_hash); OlapMeta* meta = store->get_meta(); - VLOG_NOTICE << "save tablet meta " << ", key:" << key << " meta_size=" << meta_binary.length(); + VLOG_NOTICE << "save tablet meta " + << ", key:" << key << " meta_size=" << meta_binary.length(); return meta->put(META_COLUMN_FAMILY_INDEX, key, meta_binary); } @@ -106,7 +108,7 @@ Status TabletMetaManager::save(DataDir* store, TTabletId tablet_id, TSchemaHash // 1. remove load data first // 2. remove from load meta store using term if term > 0 Status TabletMetaManager::remove(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - const string& header_prefix) { + const string& header_prefix) { std::string key = fmt::format("{}{}_{}", header_prefix, tablet_id, schema_hash); OlapMeta* meta = store->get_meta(); Status res = meta->remove(META_COLUMN_FAMILY_INDEX, key); @@ -130,8 +132,7 @@ Status TabletMetaManager::traverse_headers( TSchemaHash schema_hash = std::stol(parts[2].c_str(), nullptr, 10); return func(tablet_id, schema_hash, value); }; - Status status = - meta->iterate(META_COLUMN_FAMILY_INDEX, header_prefix, traverse_header_func); + Status status = meta->iterate(META_COLUMN_FAMILY_INDEX, header_prefix, traverse_header_func); return status; } diff --git a/be/src/olap/tablet_meta_manager.h b/be/src/olap/tablet_meta_manager.h index be693c3040..2eba2bfd6c 100644 --- a/be/src/olap/tablet_meta_manager.h +++ b/be/src/olap/tablet_meta_manager.h @@ -33,27 +33,25 @@ const std::string HEADER_PREFIX = "tabletmeta_"; class TabletMetaManager { public: static Status get_meta(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - TabletMetaSharedPtr tablet_meta); + TabletMetaSharedPtr tablet_meta); static Status get_json_meta(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - std::string* json_meta); + std::string* json_meta); static Status save(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - TabletMetaSharedPtr tablet_meta, - const string& header_prefix = "tabletmeta_"); + TabletMetaSharedPtr tablet_meta, + const string& header_prefix = "tabletmeta_"); static Status save(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - const std::string& meta_binary, - const string& header_prefix = "tabletmeta_"); + const std::string& meta_binary, const string& header_prefix = "tabletmeta_"); static Status remove(DataDir* store, TTabletId tablet_id, TSchemaHash schema_hash, - const string& header_prefix = "tabletmeta_"); + const string& header_prefix = "tabletmeta_"); - static Status traverse_headers( - OlapMeta* meta, std::function const& func, - const string& header_prefix = "tabletmeta_"); + static Status traverse_headers(OlapMeta* meta, + std::function const& func, + const string& header_prefix = "tabletmeta_"); static Status load_json_meta(DataDir* store, const std::string& meta_path); }; } // namespace doris - diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index d93d7a21b6..8554bebb3b 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -181,4 +181,3 @@ bool operator==(const TabletSchema& a, const TabletSchema& b); bool operator!=(const TabletSchema& a, const TabletSchema& b); } // namespace doris - diff --git a/be/src/olap/task/engine_batch_load_task.cpp b/be/src/olap/task/engine_batch_load_task.cpp index 2aabb9852c..a2e4c148f0 100644 --- a/be/src/olap/task/engine_batch_load_task.cpp +++ b/be/src/olap/task/engine_batch_load_task.cpp @@ -82,7 +82,8 @@ Status EngineBatchLoadTask::execute() { } else if (_push_req.push_type == TPushType::DELETE) { Status delete_data_status = _delete_data(_push_req, _tablet_infos); if (delete_data_status != Status::OK()) { - LOG(WARNING) << "delete data failed. status:" << delete_data_status << " signature:" << _signature; + LOG(WARNING) << "delete data failed. status:" << delete_data_status + << " signature:" << _signature; status = delete_data_status; } } else { @@ -277,7 +278,7 @@ Status EngineBatchLoadTask::_process() { } Status EngineBatchLoadTask::_push(const TPushReq& request, - std::vector* tablet_info_vec) { + std::vector* tablet_info_vec) { Status res = Status::OK(); LOG(INFO) << "begin to process push. " << " transaction_id=" << request.transaction_id << " tablet_id=" << request.tablet_id @@ -339,7 +340,7 @@ Status EngineBatchLoadTask::_push(const TPushReq& request, } Status EngineBatchLoadTask::_delete_data(const TPushReq& request, - std::vector* tablet_info_vec) { + std::vector* tablet_info_vec) { VLOG_DEBUG << "begin to process delete data. request=" << ThriftDebugString(request); DorisMetrics::instance()->delete_requests_total->increment(1); @@ -370,7 +371,7 @@ Status EngineBatchLoadTask::_delete_data(const TPushReq& request, if (!res.ok()) { LOG(WARNING) << "fail to push empty version for delete data. " - << "res=" << res << "tablet=" << tablet->full_name(); + << "res=" << res << "tablet=" << tablet->full_name(); DorisMetrics::instance()->delete_requests_failed->increment(1); return res; } diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index ae6d5a82b6..112def6ea3 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -47,8 +47,7 @@ Status EngineChecksumTask::_compute_checksum() { return Status::OLAPInternalError(OLAP_ERR_CE_CMD_PARAMS_ERROR); } - TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id); + TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id); if (nullptr == tablet.get()) { OLAP_LOG_WARNING("can't find tablet. [tablet_id=%ld schema_hash=%d]", _tablet_id, _schema_hash); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 19e7c89042..beb226512c 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -246,7 +246,8 @@ void EngineCloneTask::_set_tablet_info(Status status, bool is_new_tablet) { << ", expected_version: " << _clone_req.committed_version; Status drop_status = StorageEngine::instance()->tablet_manager()->drop_tablet( _clone_req.tablet_id, _clone_req.schema_hash); - if (drop_status != Status::OK() && drop_status != Status::OLAPInternalError(OLAP_ERR_TABLE_NOT_FOUND)) { + if (drop_status != Status::OK() && + drop_status != Status::OLAPInternalError(OLAP_ERR_TABLE_NOT_FOUND)) { // just log LOG(WARNING) << "drop stale cloned table failed! tablet id: " << _clone_req.tablet_id; @@ -288,9 +289,9 @@ Status EngineCloneTask::_make_and_download_snapshots(DataDir& data_dir, // Make snapshot in remote olap engine *src_host = src; // make snapshot - auto st = _make_snapshot(src.host, src.be_port, _clone_req.tablet_id, - _clone_req.schema_hash, timeout_s, missed_versions, snapshot_path, - allow_incremental_clone); + auto st = + _make_snapshot(src.host, src.be_port, _clone_req.tablet_id, _clone_req.schema_hash, + timeout_s, missed_versions, snapshot_path, allow_incremental_clone); if (st.ok()) { LOG(INFO) << "success to make snapshot. ip=" << src.host << ", port=" << src.be_port << ", tablet=" << _clone_req.tablet_id @@ -522,7 +523,7 @@ Status EngineCloneTask::_download_files(DataDir* data_dir, const std::string& re /// 1. Linke all files from CLONE dir to tablet dir if file does not exist in tablet dir /// 2. Call _finish_xx_clone() to revise the tablet meta. Status EngineCloneTask::_finish_clone(Tablet* tablet, const string& clone_dir, - int64_t committed_version, bool is_incremental_clone) { + int64_t committed_version, bool is_incremental_clone) { Status res = Status::OK(); std::vector linked_success_files; // clone and compaction operation should be performed sequentially @@ -636,8 +637,8 @@ Status EngineCloneTask::_finish_clone(Tablet* tablet, const string& clone_dir, /// 1. Get missing version from local tablet again and check if they exist in cloned tablet. /// 2. Revise the local tablet meta to add all incremental cloned rowset's meta. Status EngineCloneTask::_finish_incremental_clone(Tablet* tablet, - const TabletMeta& cloned_tablet_meta, - int64_t committed_version) { + const TabletMeta& cloned_tablet_meta, + int64_t committed_version) { LOG(INFO) << "begin to finish incremental clone. tablet=" << tablet->full_name() << ", clone version=" << committed_version; @@ -762,10 +763,9 @@ Status EngineCloneTask::_finish_full_clone(Tablet* tablet, TabletMeta* cloned_ta // but some rowset is useless, so that remove them here for (auto& rs_meta_ptr : rs_metas_found_in_src) { RowsetSharedPtr rowset_to_remove; - auto s = - RowsetFactory::create_rowset(&(cloned_tablet_meta->tablet_schema()), - tablet->tablet_path_desc().filepath, rs_meta_ptr, - &rowset_to_remove); + auto s = RowsetFactory::create_rowset(&(cloned_tablet_meta->tablet_schema()), + tablet->tablet_path_desc().filepath, rs_meta_ptr, + &rowset_to_remove); if (!s.ok()) { LOG(WARNING) << "failed to init rowset to remove: " << rs_meta_ptr->rowset_id().to_string(); diff --git a/be/src/olap/task/engine_clone_task.h b/be/src/olap/task/engine_clone_task.h index dbe0f30611..6d714750b6 100644 --- a/be/src/olap/task/engine_clone_task.h +++ b/be/src/olap/task/engine_clone_task.h @@ -44,16 +44,18 @@ private: Status _do_clone(); virtual Status _finish_clone(Tablet* tablet, const std::string& clone_dir, - int64_t committed_version, bool is_incremental_clone); + int64_t committed_version, bool is_incremental_clone); Status _finish_incremental_clone(Tablet* tablet, const TabletMeta& cloned_tablet_meta, - int64_t committed_version); + int64_t committed_version); Status _finish_full_clone(Tablet* tablet, TabletMeta* cloned_tablet_meta); - Status _make_and_download_snapshots(DataDir& data_dir, const string& local_data_path, TBackend* src_host, - string* src_file_path, vector* error_msgs, - const vector* missing_versions, bool* allow_incremental_clone); + Status _make_and_download_snapshots(DataDir& data_dir, const string& local_data_path, + TBackend* src_host, string* src_file_path, + vector* error_msgs, + const vector* missing_versions, + bool* allow_incremental_clone); void _set_tablet_info(Status status, bool is_new_tablet); diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp index 12bde0ea79..4c147dcbd9 100644 --- a/be/src/olap/task/engine_publish_version_task.cpp +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -61,10 +61,9 @@ Status EnginePublishVersionTask::finish() { TabletInfo tablet_info = tablet_rs.first; RowsetSharedPtr rowset = tablet_rs.second; VLOG_CRITICAL << "begin to publish version on tablet. " - << "tablet_id=" << tablet_info.tablet_id - << ", schema_hash=" << tablet_info.schema_hash - << ", version=" << version.first - << ", transaction_id=" << transaction_id; + << "tablet_id=" << tablet_info.tablet_id + << ", schema_hash=" << tablet_info.schema_hash + << ", version=" << version.first << ", transaction_id=" << transaction_id; // if rowset is null, it means this be received write task, but failed during write // and receive fe's publish version task // this be must return as an error tablet @@ -127,7 +126,6 @@ Status EnginePublishVersionTask::finish() { // check if the version exist, if not exist, then set publish failed if (!tablet->check_version_exist(version)) { _error_tablet_ids->push_back(tablet_info.tablet_id); - } } } diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 3af4e189b7..68c629ebb2 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -38,9 +38,8 @@ Status EngineStorageMigrationTask::execute() { return _migrate(); } -Status EngineStorageMigrationTask::_get_versions( - int32_t start_version, int32_t* end_version, - std::vector* consistent_rowsets) { +Status EngineStorageMigrationTask::_get_versions(int32_t start_version, int32_t* end_version, + std::vector* consistent_rowsets) { std::shared_lock rdlock(_tablet->get_header_lock()); const RowsetSharedPtr last_version = _tablet->rowset_with_max_version(); if (last_version == nullptr) { @@ -166,8 +165,7 @@ Status EngineStorageMigrationTask::_reload_tablet(const std::string& full_path) // if old tablet finished schema change, then the schema change status of the new tablet is DONE // else the schema change status of the new tablet is FAILED - TabletSharedPtr new_tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); + TabletSharedPtr new_tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); if (new_tablet == nullptr) { LOG(WARNING) << "tablet not found. tablet_id=" << tablet_id; return Status::OLAPInternalError(OLAP_ERR_TABLE_NOT_FOUND); @@ -286,7 +284,9 @@ Status EngineStorageMigrationTask::_migrate() { // force to copy the remaining data and index res = _copy_index_and_data_files(full_path, temp_consistent_rowsets); if (!res.ok()) { - LOG(WARNING) << "fail to copy the remaining index and data files when migrate. res=" << res; + LOG(WARNING) + << "fail to copy the remaining index and data files when migrate. res=" + << res; break; } } else { diff --git a/be/src/olap/task/engine_storage_migration_task.h b/be/src/olap/task/engine_storage_migration_task.h index 2c163e7297..b529b78c2a 100644 --- a/be/src/olap/task/engine_storage_migration_task.h +++ b/be/src/olap/task/engine_storage_migration_task.h @@ -39,12 +39,11 @@ private: // check if task is timeout bool _is_timeout(); Status _get_versions(int32_t start_version, int32_t* end_version, - std::vector* consistent_rowsets); + std::vector* consistent_rowsets); Status _check_running_txns(); // caller should not hold migration lock, and 'migration_wlock' should not be nullptr // ownership of the migration lock is transferred to the caller if check succ - Status _check_running_txns_until_timeout( - std::unique_lock* migration_wlock); + Status _check_running_txns_until_timeout(std::unique_lock* migration_wlock); // if the size less than threshold, return true bool _is_rowsets_size_less_than_threshold( @@ -61,9 +60,8 @@ private: // TODO: hkp // rewrite this function - Status _copy_index_and_data_files( - const std::string& full_path, - const std::vector& consistent_rowsets) const; + Status _copy_index_and_data_files(const std::string& full_path, + const std::vector& consistent_rowsets) const; private: // tablet to do migrated diff --git a/be/src/olap/tuple_reader.cpp b/be/src/olap/tuple_reader.cpp index bfb05a2001..63e214576d 100644 --- a/be/src/olap/tuple_reader.cpp +++ b/be/src/olap/tuple_reader.cpp @@ -37,9 +37,9 @@ using std::set; using std::vector; namespace doris { - + Status TupleReader::_init_collect_iter(const ReaderParams& read_params, - std::vector* valid_rs_readers) { + std::vector* valid_rs_readers) { _collect_iter.init(this); std::vector rs_readers; auto res = _capture_rs_readers(read_params, &rs_readers); @@ -74,7 +74,9 @@ Status TupleReader::init(const ReaderParams& read_params) { std::vector rs_readers; auto status = _init_collect_iter(read_params, &rs_readers); - if (!status.ok()) { return status; } + if (!status.ok()) { + return status; + } if (_optimize_for_single_rowset(rs_readers)) { _next_row_func = _tablet->keys_type() == AGG_KEYS ? &TupleReader::_direct_agg_key_next_row @@ -101,7 +103,7 @@ Status TupleReader::init(const ReaderParams& read_params) { } Status TupleReader::_direct_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool, - bool* eof) { + bool* eof) { if (UNLIKELY(_next_key == nullptr)) { *eof = true; return Status::OK(); @@ -115,7 +117,7 @@ Status TupleReader::_direct_next_row(RowCursor* row_cursor, MemPool* mem_pool, O } Status TupleReader::_direct_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, - ObjectPool* agg_pool, bool* eof) { + ObjectPool* agg_pool, bool* eof) { if (UNLIKELY(_next_key == nullptr)) { *eof = true; return Status::OK(); @@ -131,8 +133,8 @@ Status TupleReader::_direct_agg_key_next_row(RowCursor* row_cursor, MemPool* mem return Status::OK(); } -Status TupleReader::_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool, - bool* eof) { +Status TupleReader::_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, + ObjectPool* agg_pool, bool* eof) { if (UNLIKELY(_next_key == nullptr)) { *eof = true; return Status::OK(); @@ -171,7 +173,7 @@ Status TupleReader::_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, } Status TupleReader::_unique_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, - ObjectPool* agg_pool, bool* eof) { + ObjectPool* agg_pool, bool* eof) { *eof = false; bool cur_delete_flag = false; do { diff --git a/be/src/olap/tuple_reader.h b/be/src/olap/tuple_reader.h index 2150d3f2f8..d28a93d766 100644 --- a/be/src/olap/tuple_reader.h +++ b/be/src/olap/tuple_reader.h @@ -51,8 +51,8 @@ public: // Initialize TupleReader with tablet, data version and fetch range. Status init(const ReaderParams& read_params) override; - Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool, - ObjectPool* agg_pool, bool* eof) override { + Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool, + bool* eof) override { return (this->*_next_row_func)(row_cursor, mem_pool, agg_pool, eof); } @@ -63,29 +63,29 @@ private: // Direcly read row from rowset and pass to upper caller. No need to do aggregation. // This is usually used for DUPLICATE KEY tables Status _direct_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool, - bool* eof); + bool* eof); // Just same as _direct_next_row, but this is only for AGGREGATE KEY tables. // And this is an optimization for AGGR tables. // When there is only one rowset and is not overlapping, we can read it directly without aggregation. - Status _direct_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, - ObjectPool* agg_pool, bool* eof); + Status _direct_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool, + bool* eof); // For normal AGGREGATE KEY tables, read data by a merge heap. Status _agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool, - bool* eof); + bool* eof); // For UNIQUE KEY tables, read data by a merge heap. // The difference from _agg_key_next_row is that it will read the data from high version to low version, // to minimize the comparison time in merge heap. Status _unique_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool, - bool* eof); + bool* eof); - Status _init_collect_iter(const ReaderParams& read_params, std::vector* valid_rs_readers ); + Status _init_collect_iter(const ReaderParams& read_params, + std::vector* valid_rs_readers); private: const RowCursor* _next_key = nullptr; Status (TupleReader::*_next_row_func)(RowCursor* row_cursor, MemPool* mem_pool, - ObjectPool* agg_pool, bool* eof) = nullptr; + ObjectPool* agg_pool, bool* eof) = nullptr; }; } // namespace doris - diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp index fbf1a121fb..bad96a794f 100644 --- a/be/src/olap/txn_manager.cpp +++ b/be/src/olap/txn_manager.cpp @@ -83,34 +83,34 @@ TxnManager::TxnManager(int32_t txn_map_shard_size, int32_t txn_shard_size) } Status TxnManager::prepare_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id, const PUniqueId& load_id) { + TTransactionId transaction_id, const PUniqueId& load_id) { return prepare_txn(partition_id, transaction_id, tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid(), load_id); } Status TxnManager::commit_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id, const PUniqueId& load_id, - const RowsetSharedPtr& rowset_ptr, bool is_recovery) { + TTransactionId transaction_id, const PUniqueId& load_id, + const RowsetSharedPtr& rowset_ptr, bool is_recovery) { return commit_txn(tablet->data_dir()->get_meta(), partition_id, transaction_id, tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid(), load_id, rowset_ptr, is_recovery); } Status TxnManager::publish_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id, const Version& version) { + TTransactionId transaction_id, const Version& version) { return publish_txn(tablet->data_dir()->get_meta(), partition_id, transaction_id, tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid(), version); } // delete the txn from manager if it is not committed(not have a valid rowset) Status TxnManager::rollback_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id) { + TTransactionId transaction_id) { return rollback_txn(partition_id, transaction_id, tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid()); } Status TxnManager::delete_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id) { + TTransactionId transaction_id) { return delete_txn(tablet->data_dir()->get_meta(), partition_id, transaction_id, tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid()); } @@ -119,8 +119,8 @@ Status TxnManager::delete_txn(TPartitionId partition_id, const TabletSharedPtr& // could not distinguish rollup, schema change or base table, prepare txn successfully will allow // ingest retried Status TxnManager::prepare_txn(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, - TabletUid tablet_uid, const PUniqueId& load_id) { + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id) { TxnKey key(partition_id, transaction_id); TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); std::lock_guard txn_wrlock(_get_txn_map_lock(transaction_id)); @@ -166,10 +166,10 @@ Status TxnManager::prepare_txn(TPartitionId partition_id, TTransactionId transac } Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id, - TTransactionId transaction_id, TTabletId tablet_id, - SchemaHash schema_hash, TabletUid tablet_uid, - const PUniqueId& load_id, const RowsetSharedPtr& rowset_ptr, - bool is_recovery) { + TTransactionId transaction_id, TTabletId tablet_id, + SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id, const RowsetSharedPtr& rowset_ptr, + bool is_recovery) { if (partition_id < 1 || transaction_id < 1 || tablet_id < 1) { LOG(FATAL) << "invalid commit req " << " partition_id=" << partition_id << " transaction_id=" << transaction_id @@ -228,9 +228,8 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id, // save meta need access disk, it maybe very slow, so that it is not in global txn lock // it is under a single txn lock if (!is_recovery) { - Status save_status = - RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(), - rowset_ptr->rowset_meta()->get_rowset_pb()); + Status save_status = RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(), + rowset_ptr->rowset_meta()->get_rowset_pb()); if (save_status != Status::OK()) { LOG(WARNING) << "save committed rowset failed. when commit txn rowset_id:" << rowset_ptr->rowset_id() << "tablet id: " << tablet_id @@ -256,9 +255,9 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id, // remove a txn from txn manager Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id, - TTransactionId transaction_id, TTabletId tablet_id, - SchemaHash schema_hash, TabletUid tablet_uid, - const Version& version) { + TTransactionId transaction_id, TTabletId tablet_id, + SchemaHash schema_hash, TabletUid tablet_uid, + const Version& version) { pair key(partition_id, transaction_id); TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); RowsetSharedPtr rowset_ptr = nullptr; @@ -283,9 +282,8 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id, // TODO(ygl): rowset is already set version here, memory is changed, if save failed // it maybe a fatal error rowset_ptr->make_visible(version); - Status save_status = - RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(), - rowset_ptr->rowset_meta()->get_rowset_pb()); + Status save_status = RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(), + rowset_ptr->rowset_meta()->get_rowset_pb()); if (save_status != Status::OK()) { LOG(WARNING) << "save committed rowset failed. when publish txn rowset_id:" << rowset_ptr->rowset_id() << ", tablet id: " << tablet_id @@ -320,8 +318,7 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id, // may be committed in another thread and our current thread meets errors when writing to data file // BE has to wait for fe call clear txn api Status TxnManager::rollback_txn(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, - TabletUid tablet_uid) { + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid) { pair key(partition_id, transaction_id); TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); std::lock_guard wrlock(_get_txn_map_lock(transaction_id)); @@ -354,8 +351,8 @@ Status TxnManager::rollback_txn(TPartitionId partition_id, TTransactionId transa // fe call this api to clear unused rowsets in be // could not delete the rowset if it already has a valid version Status TxnManager::delete_txn(OlapMeta* meta, TPartitionId partition_id, - TTransactionId transaction_id, TTabletId tablet_id, - SchemaHash schema_hash, TabletUid tablet_uid) { + TTransactionId transaction_id, TTabletId tablet_id, + SchemaHash schema_hash, TabletUid tablet_uid) { pair key(partition_id, transaction_id); TabletInfo tablet_info(tablet_id, schema_hash, tablet_uid); std::lock_guard txn_wrlock(_get_txn_map_lock(transaction_id)); diff --git a/be/src/olap/txn_manager.h b/be/src/olap/txn_manager.h index 81f78ff93a..2dd6dfb753 100644 --- a/be/src/olap/txn_manager.h +++ b/be/src/olap/txn_manager.h @@ -71,48 +71,48 @@ public: } Status prepare_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id, const PUniqueId& load_id); + TTransactionId transaction_id, const PUniqueId& load_id); Status commit_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id, const PUniqueId& load_id, - const RowsetSharedPtr& rowset_ptr, bool is_recovery); + TTransactionId transaction_id, const PUniqueId& load_id, + const RowsetSharedPtr& rowset_ptr, bool is_recovery); Status publish_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id, const Version& version); + TTransactionId transaction_id, const Version& version); // delete the txn from manager if it is not committed(not have a valid rowset) Status rollback_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id); + TTransactionId transaction_id); Status delete_txn(TPartitionId partition_id, const TabletSharedPtr& tablet, - TTransactionId transaction_id); + TTransactionId transaction_id); // add a txn to manager // partition id is useful in publish version stage because version is associated with partition Status prepare_txn(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, - const PUniqueId& load_id); + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id); Status commit_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, - const PUniqueId& load_id, const RowsetSharedPtr& rowset_ptr, - bool is_recovery); + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const PUniqueId& load_id, const RowsetSharedPtr& rowset_ptr, + bool is_recovery); // remove a txn from txn manager // not persist rowset meta because Status publish_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, - const Version& version); + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, + const Version& version); // delete the txn from manager if it is not committed(not have a valid rowset) Status rollback_txn(TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); // remove the txn from txn manager // delete the related rowset if it is not null // delete rowset related data if it is not null Status delete_txn(OlapMeta* meta, TPartitionId partition_id, TTransactionId transaction_id, - TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); + TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid); void get_tablet_related_txns(TTabletId tablet_id, SchemaHash schema_hash, TabletUid tablet_uid, int64_t* partition_id, std::set* transaction_ids); @@ -209,4 +209,3 @@ inline std::mutex& TxnManager::_get_txn_lock(TTransactionId transactionId) { } } // namespace doris - diff --git a/be/src/olap/types.h b/be/src/olap/types.h index fdee6db0b4..2d486ed4be 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -1287,4 +1287,3 @@ inline const TypeInfo* get_collection_type_info() { } } // namespace doris - diff --git a/be/src/olap/utils.cpp b/be/src/olap/utils.cpp index 847a4274cc..ef8a650cdd 100644 --- a/be/src/olap/utils.cpp +++ b/be/src/olap/utils.cpp @@ -59,7 +59,7 @@ using std::vector; namespace doris { Status olap_compress(const char* src_buf, size_t src_len, char* dest_buf, size_t dest_len, - size_t* written_len, OLAPCompressionType compression_type) { + size_t* written_len, OLAPCompressionType compression_type) { if (nullptr == src_buf || nullptr == dest_buf || nullptr == written_len) { OLAP_LOG_WARNING( "input param with nullptr pointer. [src_buf=%p dest_buf=%p written_len=%p]", @@ -78,10 +78,8 @@ Status olap_compress(const char* src_buf, size_t src_len, char* dest_buf, size_t if (LZO_E_OK != (lzo_res = lzo1x_1_compress( reinterpret_cast(src_buf), src_len, reinterpret_cast(dest_buf), written_len, mem))) { - LOG(WARNING) << "compress failed. src_len=" << src_len - << "; dest_len= " << dest_len - << "; written_len=" << *written_len - << "; lzo_res=" << lzo_res; + LOG(WARNING) << "compress failed. src_len=" << src_len << "; dest_len= " << dest_len + << "; written_len=" << *written_len << "; lzo_res=" << lzo_res; return Status::OLAPInternalError(OLAP_ERR_COMPRESS_ERROR); } else if (*written_len > dest_len) { @@ -99,10 +97,8 @@ Status olap_compress(const char* src_buf, size_t src_len, char* dest_buf, size_t if (LZO_E_OK != (lzo_res = lzo1c_99_compress( reinterpret_cast(src_buf), src_len, reinterpret_cast(dest_buf), written_len, mem))) { - LOG(WARNING) << "compress failed. src_len=" << src_len - << "; dest_len= " << dest_len - << "; written_len=" << *written_len - << "; lzo_res=" << lzo_res; + LOG(WARNING) << "compress failed. src_len=" << src_len << "; dest_len= " << dest_len + << "; written_len=" << *written_len << "; lzo_res=" << lzo_res; return Status::OLAPInternalError(OLAP_ERR_COMPRESS_ERROR); } else if (*written_len > dest_len) { @@ -134,7 +130,7 @@ Status olap_compress(const char* src_buf, size_t src_len, char* dest_buf, size_t } Status olap_decompress(const char* src_buf, size_t src_len, char* dest_buf, size_t dest_len, - size_t* written_len, OLAPCompressionType compression_type) { + size_t* written_len, OLAPCompressionType compression_type) { if (nullptr == src_buf || nullptr == dest_buf || nullptr == written_len) { OLAP_LOG_WARNING( "input param with nullptr pointer. [src_buf=%p dest_buf=%p written_len=%p]", @@ -151,10 +147,8 @@ Status olap_decompress(const char* src_buf, size_t src_len, char* dest_buf, size reinterpret_cast(dest_buf), written_len, nullptr); if (LZO_E_OK != lzo_res) { - LOG(WARNING) << "decompress failed. src_len=" << src_len - << "; dest_len= " << dest_len - << "; written_len=" << *written_len - << "; lzo_res=" << lzo_res; + LOG(WARNING) << "decompress failed. src_len=" << src_len << "; dest_len= " << dest_len + << "; written_len=" << *written_len << "; lzo_res=" << lzo_res; return Status::OLAPInternalError(OLAP_ERR_DECOMPRESS_ERROR); } else if (*written_len > dest_len) { OLAP_LOG_WARNING("buffer overflow when decompressing. [dest_len=%lu written_len=%lu]", @@ -169,10 +163,8 @@ Status olap_decompress(const char* src_buf, size_t src_len, char* dest_buf, size reinterpret_cast(dest_buf), written_len, nullptr); if (LZO_E_OK != lzo_res) { - LOG(WARNING) << "compress failed. src_len=" << src_len - << "; dest_len= " << dest_len - << "; written_len=" << *written_len - << "; lzo_res=" << lzo_res; + LOG(WARNING) << "compress failed. src_len=" << src_len << "; dest_len= " << dest_len + << "; written_len=" << *written_len << "; lzo_res=" << lzo_res; return Status::OLAPInternalError(OLAP_ERR_DECOMPRESS_ERROR); } else if (*written_len > dest_len) { OLAP_LOG_WARNING("buffer overflow when decompressing. [dest_len=%lu written_len=%lu]", @@ -188,10 +180,8 @@ Status olap_decompress(const char* src_buf, size_t src_len, char* dest_buf, size int lz4_res = LZ4_decompress_safe(src_buf, dest_buf, src_len, dest_len); *written_len = lz4_res; if (lz4_res < 0) { - LOG(WARNING) << "decompress failed. src_len=" << src_len - << "; dest_len= " << dest_len - << "; written_len=" << *written_len - << "; lzo_res=" << lz4_res; + LOG(WARNING) << "decompress failed. src_len=" << src_len << "; dest_len= " << dest_len + << "; written_len=" << *written_len << "; lzo_res=" << lz4_res; return Status::OLAPInternalError(OLAP_ERR_BUFFER_OVERFLOW); } break; diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 2c612fbe02..9e3c18fabb 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -76,8 +76,7 @@ private: // @param separator 分隔符 // @param result 切分结果 template -Status split_string(const std::string& base, const T separator, - std::vector* result) { +Status split_string(const std::string& base, const T separator, std::vector* result) { if (!result) { return Status::OLAPInternalError(OLAP_ERR_OTHER_ERROR); } @@ -117,10 +116,10 @@ void _destruct_array(const void* array, void*) { // 根据压缩类型的不同,执行压缩。dest_buf_len是dest_buf的最大长度, // 通过指针返回的written_len是实际写入的长度。 Status olap_compress(const char* src_buf, size_t src_len, char* dest_buf, size_t dest_len, - size_t* written_len, OLAPCompressionType compression_type); + size_t* written_len, OLAPCompressionType compression_type); Status olap_decompress(const char* src_buf, size_t src_len, char* dest_buf, size_t dest_len, - size_t* written_len, OLAPCompressionType compression_type); + size_t* written_len, OLAPCompressionType compression_type); // 计算adler32的包装函数 // 第一次使用的时候第一个参数传宏ADLER32_INIT, 之后的调用传上次计算的结果 @@ -219,9 +218,13 @@ private: }; inline bool is_io_error(Status status) { - return (((Status::OLAPInternalError(OLAP_ERR_IO_ERROR) == status || Status::OLAPInternalError(OLAP_ERR_READ_UNENOUGH) == status) && errno == EIO) || - Status::OLAPInternalError(OLAP_ERR_CHECKSUM_ERROR) == status || Status::OLAPInternalError(OLAP_ERR_FILE_DATA_ERROR) == status || - Status::OLAPInternalError(OLAP_ERR_TEST_FILE_ERROR) == status || Status::OLAPInternalError(OLAP_ERR_ROWBLOCK_READ_INFO_ERROR) == status); + return (((Status::OLAPInternalError(OLAP_ERR_IO_ERROR) == status || + Status::OLAPInternalError(OLAP_ERR_READ_UNENOUGH) == status) && + errno == EIO) || + Status::OLAPInternalError(OLAP_ERR_CHECKSUM_ERROR) == status || + Status::OLAPInternalError(OLAP_ERR_FILE_DATA_ERROR) == status || + Status::OLAPInternalError(OLAP_ERR_TEST_FILE_ERROR) == status || + Status::OLAPInternalError(OLAP_ERR_ROWBLOCK_READ_INFO_ERROR) == status); } #define ENDSWITH(str, suffix) ((str).rfind(suffix) == (str).size() - strlen(suffix)) @@ -314,4 +317,3 @@ bool valid_bool(const std::string& value_str); } while (0) } // namespace doris - diff --git a/be/src/olap/version_graph.cpp b/be/src/olap/version_graph.cpp index 59b841a7bc..fb782f4b36 100644 --- a/be/src/olap/version_graph.cpp +++ b/be/src/olap/version_graph.cpp @@ -559,7 +559,7 @@ void VersionGraph::_add_vertex_to_graph(int64_t vertex_value) { } Status VersionGraph::capture_consistent_versions(const Version& spec_version, - std::vector* version_path) const { + std::vector* version_path) const { if (spec_version.first > spec_version.second) { LOG(WARNING) << "invalid specified version. " << "spec_version=" << spec_version.first << "-" << spec_version.second; diff --git a/be/src/olap/version_graph.h b/be/src/olap/version_graph.h index 3383f3fd6e..7544803d30 100644 --- a/be/src/olap/version_graph.h +++ b/be/src/olap/version_graph.h @@ -48,7 +48,7 @@ public: /// Given a spec_version, this method can find a version path which is the shortest path /// in the graph. The version paths are added to version_path as return info. Status capture_consistent_versions(const Version& spec_version, - std::vector* version_path) const; + std::vector* version_path) const; // See comment of TimestampedVersionTracker's get_orphan_vertex_ratio(); double get_orphan_vertex_ratio(); @@ -157,7 +157,7 @@ public: /// in the graph. The version paths are added to version_path as return info. /// If this version not in main version, version_path can be included expired rowset. Status capture_consistent_versions(const Version& spec_version, - std::vector* version_path) const; + std::vector* version_path) const; /// Capture all expired path version. /// When the last rowset create time of a path greater than expired time which can be expressed @@ -212,4 +212,3 @@ private: }; } // namespace doris - diff --git a/be/src/olap/wrapper_field.h b/be/src/olap/wrapper_field.h index 4663da68d0..021c5d64e7 100644 --- a/be/src/olap/wrapper_field.h +++ b/be/src/olap/wrapper_field.h @@ -115,4 +115,3 @@ private: }; } // namespace doris - diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc index c2b9dc0e7a..4ed5472b7c 100644 --- a/be/src/runtime/buffered_block_mgr2.cc +++ b/be/src/runtime/buffered_block_mgr2.cc @@ -58,7 +58,8 @@ public: const std::shared_ptr& tracker, RuntimeState* state) : _mgr(mgr), _state(state), - _tracker(MemTracker::create_virtual_tracker(-1, "BufferedBlockMgr2::Client", tracker)), + _tracker( + MemTracker::create_virtual_tracker(-1, "BufferedBlockMgr2::Client", tracker)), _num_reserved_buffers(num_reserved_buffers), _num_tmp_reserved_buffers(0), _num_pinned_buffers(0) { diff --git a/be/src/runtime/bufferpool/reservation_tracker.cc b/be/src/runtime/bufferpool/reservation_tracker.cc index b3bd9baf32..405e75eadb 100644 --- a/be/src/runtime/bufferpool/reservation_tracker.cc +++ b/be/src/runtime/bufferpool/reservation_tracker.cc @@ -75,8 +75,7 @@ void ReservationTracker::InitChildTracker(RuntimeProfile* profile, ReservationTr DCHECK_EQ(parent_mem_tracker, mem_tracker_->parent().get()); // Make sure we don't have a lower limit than the ancestor, since we don't enforce // limits at lower links. - DCHECK_EQ(mem_tracker_->get_lowest_limit(), - parent_mem_tracker->get_lowest_limit()); + DCHECK_EQ(mem_tracker_->get_lowest_limit(), parent_mem_tracker->get_lowest_limit()); } else { // Make sure we didn't leave a gap in the links. E.g. this tracker's grandparent // shouldn't have a MemTracker. diff --git a/be/src/runtime/client_cache.cpp b/be/src/runtime/client_cache.cpp index 2e6217df2c..020d46b141 100644 --- a/be/src/runtime/client_cache.cpp +++ b/be/src/runtime/client_cache.cpp @@ -148,9 +148,11 @@ void ClientCacheHelper::release_client(void** client_key) { DCHECK(client_map_entry != _client_map.end()); client_to_close = client_map_entry->second; - auto cache_list = _client_cache.find(make_network_address(client_to_close->ipaddress(), client_to_close->port())); + auto cache_list = _client_cache.find( + make_network_address(client_to_close->ipaddress(), client_to_close->port())); DCHECK(cache_list != _client_cache.end()); - if (_max_cache_size_per_host >= 0 && cache_list->second.size() >= _max_cache_size_per_host) { + if (_max_cache_size_per_host >= 0 && + cache_list->second.size() >= _max_cache_size_per_host) { // cache of this host is full, close this client connection and remove if from _client_map _client_map.erase(*client_key); } else { @@ -185,7 +187,8 @@ void ClientCacheHelper::close_connections(const TNetworkAddress& hostport) { return; } - VLOG_RPC << "Invalidating all " << cache_entry->second.size() << " clients for: " << hostport; + VLOG_RPC << "Invalidating all " << cache_entry->second.size() + << " clients for: " << hostport; for (void* client_key : cache_entry->second) { auto client_map_entry = _client_map.find(client_key); DCHECK(client_map_entry != _client_map.end()); diff --git a/be/src/runtime/collection_value.cpp b/be/src/runtime/collection_value.cpp index f115db2cba..f3f30e3cbe 100644 --- a/be/src/runtime/collection_value.cpp +++ b/be/src/runtime/collection_value.cpp @@ -26,12 +26,9 @@ namespace doris { -using AllocateMemFunc = std::function; -static Status init_collection( - CollectionValue* value, - const AllocateMemFunc& allocate, - uint32_t size, - PrimitiveType child_type); +using AllocateMemFunc = std::function; +static Status init_collection(CollectionValue* value, const AllocateMemFunc& allocate, + uint32_t size, PrimitiveType child_type); int sizeof_type(PrimitiveType type) { switch (type) { @@ -106,7 +103,7 @@ size_t CollectionValue::get_byte_size(const TypeDescriptor& type) const { const auto& item_type = type.children[0]; result += _length * item_type.get_slot_size(); if (item_type.is_string_type()) { - for (int i = 0; i < _length; ++ i) { + for (int i = 0; i < _length; ++i) { if (is_null_at(i)) { continue; } @@ -115,12 +112,13 @@ size_t CollectionValue::get_byte_size(const TypeDescriptor& type) const { result += item->len; } } else if (item_type.type == TYPE_ARRAY) { - for (int i = 0; i < _length; ++ i) { + for (int i = 0; i < _length; ++i) { if (is_null_at(i)) { continue; } int item_offset = i * item_type.get_slot_size(); - CollectionValue* item = reinterpret_cast(((uint8_t*)_data) + item_offset); + CollectionValue* item = + reinterpret_cast(((uint8_t*)_data) + item_offset); result += item->get_byte_size(item_type); } } @@ -133,18 +131,13 @@ ArrayIterator CollectionValue::iterator(PrimitiveType children_type) const { Status CollectionValue::init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, CollectionValue* value) { - return doris::init_collection(value, [pool](size_t size) -> uint8_t* { - return pool->add_array(new uint8_t[size]); - }, - size, child_type - ); + return doris::init_collection( + value, [pool](size_t size) -> uint8_t* { return pool->add_array(new uint8_t[size]); }, + size, child_type); } -static Status init_collection( - CollectionValue* value, - const AllocateMemFunc& allocate, - uint32_t size, - PrimitiveType child_type) { +static Status init_collection(CollectionValue* value, const AllocateMemFunc& allocate, + uint32_t size, PrimitiveType child_type) { if (value == nullptr) { return Status::InvalidArgument("collection value is null"); } @@ -167,20 +160,14 @@ static Status init_collection( Status CollectionValue::init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, CollectionValue* value) { - return doris::init_collection(value, [pool](size_t size) { - return pool->allocate(size); - }, - size, child_type - ); + return doris::init_collection( + value, [pool](size_t size) { return pool->allocate(size); }, size, child_type); } Status CollectionValue::init_collection(FunctionContext* context, uint32_t size, PrimitiveType child_type, CollectionValue* value) { - return doris::init_collection(value, [context](size_t size) { - return context->allocate(size); - }, - size, child_type - ); + return doris::init_collection( + value, [context](size_t size) { return context->allocate(size); }, size, child_type); } CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) { @@ -190,11 +177,10 @@ CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) { // Deep copy collection. // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( // copy data from origin collection value). -void CollectionValue::deep_copy_collection( - CollectionValue* shallow_copied_cv, - const TypeDescriptor& item_type, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs) { +void CollectionValue::deep_copy_collection(CollectionValue* shallow_copied_cv, + const TypeDescriptor& item_type, + const GenMemFootprintFunc& gen_mem_footprint, + bool convert_ptrs) { CollectionValue* cv = shallow_copied_cv; if (cv->length() == 0) { return; @@ -231,17 +217,15 @@ void CollectionValue::deep_copy_collection( // Deep copy items in collection. // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( // copy data from origin collection value). -void CollectionValue::deep_copy_items_in_collection( - CollectionValue* shallow_copied_cv, - char* base, - const TypeDescriptor& item_type, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs) { +void CollectionValue::deep_copy_items_in_collection(CollectionValue* shallow_copied_cv, char* base, + const TypeDescriptor& item_type, + const GenMemFootprintFunc& gen_mem_footprint, + bool convert_ptrs) { int nulls_size = shallow_copied_cv->has_null() ? shallow_copied_cv->length() : 0; char* item_base = base + nulls_size; if (item_type.is_string_type()) { // when itemtype is string, copy every string item - for (int i = 0; i < shallow_copied_cv->length(); ++ i) { + for (int i = 0; i < shallow_copied_cv->length(); ++i) { if (shallow_copied_cv->is_null_at(i)) { continue; } @@ -256,7 +240,7 @@ void CollectionValue::deep_copy_items_in_collection( } } } else if (item_type.type == TYPE_ARRAY) { - for (int i = 0; i < shallow_copied_cv->length(); ++ i) { + for (int i = 0; i < shallow_copied_cv->length(); ++i) { if (shallow_copied_cv->is_null_at(i)) { continue; } @@ -267,10 +251,8 @@ void CollectionValue::deep_copy_items_in_collection( } } -void CollectionValue::deserialize_collection( - CollectionValue* cv, - const char* tuple_data, - const TypeDescriptor& type) { +void CollectionValue::deserialize_collection(CollectionValue* cv, const char* tuple_data, + const TypeDescriptor& type) { if (cv->length() == 0) { new (cv) CollectionValue(cv->length()); return; @@ -291,8 +273,8 @@ void CollectionValue::deserialize_collection( continue; } - StringValue* dst_item_v = convert_to( - (uint8_t*)cv->data() + i * item_type.get_slot_size()); + StringValue* dst_item_v = + convert_to((uint8_t*)cv->data() + i * item_type.get_slot_size()); if (dst_item_v->len != 0) { int offset = convert_to(dst_item_v->ptr); @@ -305,8 +287,8 @@ void CollectionValue::deserialize_collection( continue; } - CollectionValue* item_cv = convert_to( - (uint8_t*)cv->data() + i * item_type.get_slot_size()); + CollectionValue* item_cv = convert_to((uint8_t*)cv->data() + + i * item_type.get_slot_size()); deserialize_collection(item_cv, tuple_data, item_type); } } @@ -333,7 +315,8 @@ Status CollectionValue::set(uint32_t i, PrimitiveType type, const AnyVal* value) *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; break; case TYPE_SMALLINT: - *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; + *reinterpret_cast(iter.value()) = + reinterpret_cast(value)->val; break; case TYPE_INT: *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h index 7aa14a955f..3ba76de3fa 100644 --- a/be/src/runtime/collection_value.h +++ b/be/src/runtime/collection_value.h @@ -28,7 +28,7 @@ namespace doris { using doris_udf::AnyVal; using MemFootprint = std::pair; -using GenMemFootprintFunc = std::function; +using GenMemFootprintFunc = std::function; struct TypeDescriptor; class ArrayIterator; @@ -59,9 +59,7 @@ public: CollectionValue(void* data, uint32_t length, bool has_null, bool* null_signs) : _data(data), _length(length), _has_null(has_null), _null_signs(null_signs) {} - bool is_null_at(uint32_t index) const { - return this->_has_null && this->_null_signs[index]; - } + bool is_null_at(uint32_t index) const { return this->_has_null && this->_null_signs[index]; } void to_collection_val(CollectionVal* val) const; @@ -100,26 +98,21 @@ public: // Deep copy collection. // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( // copy data from origin collection value). - static void deep_copy_collection( - CollectionValue* shallow_copied_cv, - const TypeDescriptor& item_type, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs); + static void deep_copy_collection(CollectionValue* shallow_copied_cv, + const TypeDescriptor& item_type, + const GenMemFootprintFunc& gen_mem_footprint, + bool convert_ptrs); // Deep copy items in collection. // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( // copy data from origin collection value). - static void deep_copy_items_in_collection( - CollectionValue* shallow_copied_cv, - char* base, - const TypeDescriptor& item_type, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs); + static void deep_copy_items_in_collection(CollectionValue* shallow_copied_cv, char* base, + const TypeDescriptor& item_type, + const GenMemFootprintFunc& gen_mem_footprint, + bool convert_ptrs); - static void deserialize_collection( - CollectionValue* cv, - const char* tuple_data, - const TypeDescriptor& type); + static void deserialize_collection(CollectionValue* cv, const char* tuple_data, + const TypeDescriptor& type); const void* data() const { return _data; } bool has_null() const { return _has_null; } diff --git a/be/src/runtime/data_stream_mgr.cpp b/be/src/runtime/data_stream_mgr.cpp index f5f7691075..ca09a8700d 100644 --- a/be/src/runtime/data_stream_mgr.cpp +++ b/be/src/runtime/data_stream_mgr.cpp @@ -72,9 +72,9 @@ shared_ptr DataStreamMgr::create_recvr( DCHECK(profile != nullptr); VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id << ", node=" << dest_node_id; - shared_ptr recvr(new DataStreamRecvr( - this, row_desc, fragment_instance_id, dest_node_id, - num_senders, is_merging, buffer_size, profile, sub_plan_query_statistics_recvr)); + shared_ptr recvr( + new DataStreamRecvr(this, row_desc, fragment_instance_id, dest_node_id, num_senders, + is_merging, buffer_size, profile, sub_plan_query_statistics_recvr)); uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id); lock_guard l(_lock); _fragment_stream_set.insert(std::make_pair(fragment_instance_id, dest_node_id)); diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp index abbd17b34f..9914fb99d9 100644 --- a/be/src/runtime/data_stream_sender.cpp +++ b/be/src/runtime/data_stream_sender.cpp @@ -151,8 +151,8 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { if (_parent->_transfer_data_by_brpc_attachment && _brpc_request.has_row_batch()) { request_row_batch_transfer_attachment>(&_brpc_request, _parent->_tuple_data_buffer, - _closure); + RefCountClosure>( + &_brpc_request, _parent->_tuple_data_buffer, _closure); } _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure); if (batch != nullptr) { @@ -276,9 +276,8 @@ DataStreamSender::DataStreamSender(ObjectPool* pool, int sender_id, const RowDes _bytes_sent_counter(nullptr), _local_bytes_send_counter(nullptr), _transfer_data_by_brpc_attachment(config::transfer_data_by_brpc_attachment) { - if (_transfer_data_by_brpc_attachment) { - _tuple_data_buffer_ptr = &_tuple_data_buffer; + _tuple_data_buffer_ptr = &_tuple_data_buffer; } } @@ -300,9 +299,8 @@ DataStreamSender::DataStreamSender(ObjectPool* pool, int sender_id, const RowDes _ignore_not_found(sink.__isset.ignore_not_found ? sink.ignore_not_found : true), _dest_node_id(sink.dest_node_id), _transfer_data_by_brpc_attachment(config::transfer_data_by_brpc_attachment) { - if (_transfer_data_by_brpc_attachment) { - _tuple_data_buffer_ptr = &_tuple_data_buffer; + _tuple_data_buffer_ptr = &_tuple_data_buffer; } DCHECK_GT(destinations.size(), 0); @@ -391,8 +389,8 @@ Status DataStreamSender::prepare(RuntimeState* state) { _profile = _pool->add(new RuntimeProfile(title.str())); SCOPED_TIMER(_profile->total_time_counter()); _mem_tracker = MemTracker::create_tracker( - -1, "DataStreamSender:" + print_id(state->fragment_instance_id()), - nullptr, MemTrackerLevel::VERBOSE, _profile); + -1, "DataStreamSender:" + print_id(state->fragment_instance_id()), nullptr, + MemTrackerLevel::VERBOSE, _profile); SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) { @@ -673,7 +671,8 @@ Status DataStreamSender::serialize_batch(RowBatch* src, PRowBatch* dest, int num { SCOPED_TIMER(_serialize_batch_timer); size_t uncompressed_bytes = 0, compressed_bytes = 0; - RETURN_IF_ERROR(src->serialize(dest, &uncompressed_bytes, &compressed_bytes, _tuple_data_buffer_ptr)); + RETURN_IF_ERROR(src->serialize(dest, &uncompressed_bytes, &compressed_bytes, + _tuple_data_buffer_ptr)); COUNTER_UPDATE(_bytes_sent_counter, compressed_bytes * num_receivers); COUNTER_UPDATE(_uncompressed_bytes_counter, uncompressed_bytes * num_receivers); } diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h index 6ae858f0da..47a46ac377 100644 --- a/be/src/runtime/datetime_value.h +++ b/be/src/runtime/datetime_value.h @@ -566,9 +566,9 @@ public: private: // Used to make sure sizeof DateTimeValue friend class UnusedClass; - friend void doris::vectorized::VecDateTimeValue::convert_vec_dt_to_dt(DateTimeValue* dt); + friend void doris::vectorized::VecDateTimeValue::convert_vec_dt_to_dt(DateTimeValue* dt); friend void doris::vectorized::VecDateTimeValue::convert_dt_to_vec_dt(DateTimeValue* dt); - + void from_packed_time(int64_t packed_time) { _microsecond = packed_time % (1LL << 24); int64_t ymdhms = packed_time >> 24; diff --git a/be/src/runtime/decimalv2_value.cpp b/be/src/runtime/decimalv2_value.cpp index 36c66ed360..4eef5e1558 100644 --- a/be/src/runtime/decimalv2_value.cpp +++ b/be/src/runtime/decimalv2_value.cpp @@ -238,7 +238,8 @@ DecimalV2Value& DecimalV2Value::operator+=(const DecimalV2Value& other) { // Solve a one-dimensional quadratic equation: ax2 + bx + c =0 // Reference: https://gist.github.com/miloyip/1fcc1859c94d33a01957cf41a7c25fdf // Reference: https://www.zhihu.com/question/51381686 -static std::pair quadratic_equation_naive(__uint128_t a, __uint128_t b, __uint128_t c) { +static std::pair quadratic_equation_naive(__uint128_t a, __uint128_t b, + __uint128_t c) { __uint128_t dis = b * b - 4 * a * c; // assert(dis >= 0); // not handling complex root @@ -252,15 +253,18 @@ static std::pair quadratic_equation_naive(__uint128_t a, __uint1 } static inline double sgn(double x) { - if (x > 0) return 1; - else if (x < 0) return -1; - else return 0; + if (x > 0) + return 1; + else if (x < 0) + return -1; + else + return 0; } -// In the above quadratic_equation_naive solution process, we found that -b + sqrtdis will -// get the correct answer, and -b-sqrtdis will get the wrong answer. For two close floating-point +// In the above quadratic_equation_naive solution process, we found that -b + sqrtdis will +// get the correct answer, and -b-sqrtdis will get the wrong answer. For two close floating-point // decimals a, b, a-b will cause larger errors than a + b, which is called catastrophic cancellation. -// Both -b and sqrtdis are positive numbers. We can first find the roots brought by -b + sqrtdis, +// Both -b and sqrtdis are positive numbers. We can first find the roots brought by -b + sqrtdis, // and then use the product of the two roots of the quadratic equation in one unknown to find another root static std::pair quadratic_equation_better(int128_t a, int128_t b, int128_t c) { if (b == 0) return quadratic_equation_naive(a, b, c); @@ -269,14 +273,14 @@ static std::pair quadratic_equation_better(int128_t a, int128_t // not handling complex root if (dis < 0) return std::make_pair(0, 0); - // There may be a loss of precision, but here is used to find the mantissa of the square root. - // The current SCALE=9, which is less than the 15 significant digits of the double type, + // There may be a loss of precision, but here is used to find the mantissa of the square root. + // The current SCALE=9, which is less than the 15 significant digits of the double type, // so theoretically the loss of precision will not be reflected in the result. double sqrtdis = std::sqrt(static_cast(dis)); double a_r = static_cast(a); double b_r = static_cast(b); double c_r = static_cast(c); - // Here b comes from an unsigned integer, and sgn(b) is always 1, + // Here b comes from an unsigned integer, and sgn(b) is always 1, // which is only used to preserve the complete algorithm double x1 = (-b_r - sgn(b_r) * sqrtdis) / (a_r + a_r); double x2 = c_r / (a_r * x1); @@ -284,49 +288,50 @@ static std::pair quadratic_equation_better(int128_t a, int128_t } // Large integer square roots, returns the integer part. -// The time complexity is lower than the traditional dichotomy +// The time complexity is lower than the traditional dichotomy // and Newton iteration method, and the number of iterations is fixed. -// in real-time systems, functions that execute an unpredictable number of iterations +// in real-time systems, functions that execute an unpredictable number of iterations // will make the total time per task unpredictable, and introduce jitter // Reference: https://www.embedded.com/integer-square-roots/ -// Reference: https://link.zhihu.com/?target=https%3A//gist.github.com/miloyip/69663b78b26afa0dcc260382a6034b1a -// Reference: https://www.zhihu.com/question/35122102 +// Reference: https://link.zhihu.com/?target=https%3A//gist.github.com/miloyip/69663b78b26afa0dcc260382a6034b1a +// Reference: https://www.zhihu.com/question/35122102 static std::pair<__uint128_t, __uint128_t> sqrt_integer(__uint128_t n) { __uint128_t remainder = 0, root = 0; for (size_t i = 0; i < 64; i++) { root <<= 1; ++root; remainder <<= 2; - remainder |= n >> 126; n <<= 2; // Extract 2 MSB from n + remainder |= n >> 126; + n <<= 2; // Extract 2 MSB from n if (root <= remainder) { remainder -= root; ++root; - } - else{ + } else { --root; } } return std::make_pair(root >>= 1, remainder); } -// According to the integer part and the remainder of the square root, +// According to the integer part and the remainder of the square root, // Use one-dimensional quadratic equation to solve the fractional part of the square root static double sqrt_fractional(int128_t sqrt_int, int128_t remainder) { - std::pair p = quadratic_equation_better(1, 2*sqrt_int, -remainder); - if ((0 < p.first) && (p.first < 1)) return p.first; - if ((0 < p.second) && (p.second < 1)) return p.second; + std::pair p = quadratic_equation_better(1, 2 * sqrt_int, -remainder); + if ((0 < p.first) && (p.first < 1)) return p.first; + if ((0 < p.second) && (p.second < 1)) return p.second; return 0; } -const int128_t DecimalV2Value::SQRT_MOLECULAR_MAGNIFICATION = get_scale_base(PRECISION/2); -const int128_t DecimalV2Value::SQRT_DENOMINATOR = std::sqrt(ONE_BILLION) * get_scale_base(PRECISION/2 - SCALE); +const int128_t DecimalV2Value::SQRT_MOLECULAR_MAGNIFICATION = get_scale_base(PRECISION / 2); +const int128_t DecimalV2Value::SQRT_DENOMINATOR = + std::sqrt(ONE_BILLION) * get_scale_base(PRECISION / 2 - SCALE); DecimalV2Value DecimalV2Value::sqrt(const DecimalV2Value& v) { int128_t x = v.value(); std::pair<__uint128_t, __uint128_t> sqrt_integer_ret; bool is_negative = (x < 0); if (x == 0) { - return DecimalV2Value(0); + return DecimalV2Value(0); } sqrt_integer_ret = sqrt_integer(abs(x)); int128_t integer_root = static_cast(sqrt_integer_ret.first); @@ -336,10 +341,11 @@ DecimalV2Value DecimalV2Value::sqrt(const DecimalV2Value& v) { // Multiplying by SQRT_MOLECULAR_MAGNIFICATION here will not overflow, // because integer_root can be up to 64 bits. int128_t molecular_integer = integer_root * SQRT_MOLECULAR_MAGNIFICATION; - int128_t molecular_fractional = static_cast(fractional * SQRT_MOLECULAR_MAGNIFICATION); - int128_t ret = (molecular_integer + molecular_fractional)/SQRT_DENOMINATOR; + int128_t molecular_fractional = + static_cast(fractional * SQRT_MOLECULAR_MAGNIFICATION); + int128_t ret = (molecular_integer + molecular_fractional) / SQRT_DENOMINATOR; if (is_negative) ret = -ret; - return DecimalV2Value(ret); + return DecimalV2Value(ret); } int DecimalV2Value::parse_from_str(const char* decimal_str, int32_t length) { @@ -388,7 +394,7 @@ std::string DecimalV2Value::to_string(int scale) const { } else { auto f_frac = fmt::format_int(frac_val); if (f_frac.size() < scale) { - str.append(scale - f_frac.size(), '0'); + str.append(scale - f_frac.size(), '0'); } str.append(f_frac.data(), f_frac.size()); } diff --git a/be/src/runtime/decimalv2_value.h b/be/src/runtime/decimalv2_value.h index 4d0e69fd01..45032e48da 100644 --- a/be/src/runtime/decimalv2_value.h +++ b/be/src/runtime/decimalv2_value.h @@ -61,8 +61,8 @@ public: static constexpr int32_t PRECISION = 27; static constexpr int32_t SCALE = 9; - static constexpr int32_t SCALE_TRIM_ARRAY[SCALE + 1] = - { 1000000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; + static constexpr int32_t SCALE_TRIM_ARRAY[SCALE + 1] = { + 1000000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1}; static constexpr uint32_t ONE_BILLION = 1000000000; static constexpr int64_t MAX_INT_VALUE = 999999999999999999; static constexpr int32_t MAX_FRAC_VALUE = 999999999; diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index 08f3293fa7..6c856f719b 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -148,8 +148,7 @@ std::string BrokerTableDescriptor::debug_string() const { return out.str(); } -HiveTableDescriptor::HiveTableDescriptor(const TTableDescriptor& tdesc) - : TableDescriptor(tdesc) {} +HiveTableDescriptor::HiveTableDescriptor(const TTableDescriptor& tdesc) : TableDescriptor(tdesc) {} HiveTableDescriptor::~HiveTableDescriptor() {} diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index b3cb0e34a3..884a2a5379 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -60,16 +60,16 @@ class PSlotDescriptor; // This is more efficient than branching to check if the slot is non-nullable. struct NullIndicatorOffset { int byte_offset; - uint8_t bit_mask; // to extract null indicator - int8_t bit_offset; // only used to serialize, from 1 to 8, invalid null value - // bit_offset is -1. + uint8_t bit_mask; // to extract null indicator + int8_t bit_offset; // only used to serialize, from 1 to 8, invalid null value + // bit_offset is -1. NullIndicatorOffset(int byte_offset, int bit_offset_) : byte_offset(byte_offset), bit_mask(bit_offset_ == -1 ? 0 : 1 << (7 - bit_offset_)), bit_offset(bit_offset_) { - DCHECK_LE(bit_offset_, 8); - } + DCHECK_LE(bit_offset_, 8); + } bool equals(const NullIndicatorOffset& o) const { return this->byte_offset == o.byte_offset && this->bit_mask == o.bit_mask; diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc index 1f65851809..892ce1512c 100644 --- a/be/src/runtime/disk_io_mgr.cc +++ b/be/src/runtime/disk_io_mgr.cc @@ -284,8 +284,7 @@ DiskIoMgr::DiskIoMgr() // std::min((uint64_t)config::max_cached_file_handles, FileSystemUtil::max_num_file_handles()), // &HdfsCachedFileHandle::release) { { - _mem_tracker = - MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW); + _mem_tracker = MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW); SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); int64_t max_buffer_size_scaled = bit_ceil(_max_buffer_size, _min_buffer_size); _free_buffers.resize(bit_log2(max_buffer_size_scaled) + 1); @@ -307,8 +306,7 @@ DiskIoMgr::DiskIoMgr(int num_local_disks, int threads_per_disk, int min_buffer_s // _file_handle_cache(::min(config::max_cached_file_handles, // FileSystemUtil::max_num_file_handles()), &HdfsCachedFileHandle::release) { { - _mem_tracker = - MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW); + _mem_tracker = MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW); SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); int64_t max_buffer_size_scaled = bit_ceil(_max_buffer_size, _min_buffer_size); _free_buffers.resize(bit_log2(max_buffer_size_scaled) + 1); @@ -402,8 +400,8 @@ Status DiskIoMgr::init(const int64_t mem_limit) { ss << "work-loop(Disk: " << i << ", Thread: " << j << ")"; // _disk_thread_group.AddThread(new Thread("disk-io-mgr", ss.str(), // &DiskIoMgr::work_loop, this, _disk_queues[i])); - _disk_thread_group.add_thread( - new std::thread(std::bind(&DiskIoMgr::work_loop, this, _disk_queues[i], _mem_tracker))); + _disk_thread_group.add_thread(new std::thread( + std::bind(&DiskIoMgr::work_loop, this, _disk_queues[i], _mem_tracker))); } } _request_context_cache.reset(new RequestContextCache(this)); @@ -739,7 +737,8 @@ char* DiskIoMgr::get_free_buffer(int64_t* buffer_size) { buffer = new char[*buffer_size]; } else { // This means the buffer's memory ownership is transferred from DiskIoMgr to tls tracker. - _mem_tracker->transfer_to(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get(), *buffer_size); + _mem_tracker->transfer_to(tls_ctx()->_thread_mem_tracker_mgr->mem_tracker().get(), + *buffer_size); buffer = _free_buffers[idx].front(); _free_buffers[idx].pop_front(); } diff --git a/be/src/runtime/dpp_sink_internal.cpp b/be/src/runtime/dpp_sink_internal.cpp index 14442553ff..a35297a8ae 100644 --- a/be/src/runtime/dpp_sink_internal.cpp +++ b/be/src/runtime/dpp_sink_internal.cpp @@ -31,7 +31,8 @@ namespace doris { PartRangeKey PartRangeKey::_s_pos_infinite(1); PartRangeKey PartRangeKey::_s_neg_infinite(-1); -PartRange PartRange::_s_all_range(PartRangeKey::neg_infinite(), PartRangeKey::pos_infinite(), true, true); +PartRange PartRange::_s_all_range(PartRangeKey::neg_infinite(), PartRangeKey::pos_infinite(), true, + true); Status PartRangeKey::from_thrift(ObjectPool* pool, const TPartitionKey& t_key, PartRangeKey* key) { key->_sign = t_key.sign; diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 7caf55ac8b..87206099e5 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -234,7 +234,6 @@ private: RoutineLoadTaskExecutor* _routine_load_task_executor = nullptr; SmallFileMgr* _small_file_mgr = nullptr; HeartbeatFlags* _heartbeat_flags = nullptr; - }; template <> diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 8118fd2683..0c9a6edc84 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -189,13 +189,14 @@ Status ExecEnv::_init_mem_tracker() { global_memory_limit_bytes = MemInfo::physical_mem(); } MemTracker::get_process_tracker()->set_limit(global_memory_limit_bytes); - _query_pool_mem_tracker = - MemTracker::create_tracker(global_memory_limit_bytes, "QueryPool", MemTracker::get_process_tracker(), - MemTrackerLevel::OVERVIEW); + _query_pool_mem_tracker = MemTracker::create_tracker(global_memory_limit_bytes, "QueryPool", + MemTracker::get_process_tracker(), + MemTrackerLevel::OVERVIEW); REGISTER_HOOK_METRIC(query_mem_consumption, [this]() { return _query_pool_mem_tracker->consumption(); }); - _load_pool_mem_tracker = MemTracker::create_tracker( - global_memory_limit_bytes, "LoadPool", MemTracker::get_process_tracker(), MemTrackerLevel::OVERVIEW); + _load_pool_mem_tracker = MemTracker::create_tracker(global_memory_limit_bytes, "LoadPool", + MemTracker::get_process_tracker(), + MemTrackerLevel::OVERVIEW); REGISTER_HOOK_METRIC(load_mem_consumption, [this]() { return _load_pool_mem_tracker->consumption(); }); LOG(INFO) << "Using global memory limit: " diff --git a/be/src/runtime/file_result_writer.cpp b/be/src/runtime/file_result_writer.cpp index 32884213f2..52153290d4 100644 --- a/be/src/runtime/file_result_writer.cpp +++ b/be/src/runtime/file_result_writer.cpp @@ -217,7 +217,7 @@ std::string FileResultWriter::_file_format_to_name() { } } std::string FileResultWriter::gen_types() { - std::string types = ""; + std::string types = ""; int num_columns = _output_expr_ctxs.size(); for (int i = 0; i < num_columns; ++i) { types += type_to_string(_output_expr_ctxs[i]->root()->type().type); diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp index 5cdfcb084e..aaf7670535 100644 --- a/be/src/runtime/fold_constant_executor.cpp +++ b/be/src/runtime/fold_constant_executor.cpp @@ -42,8 +42,8 @@ namespace doris { TUniqueId FoldConstantExecutor::_dummy_id; -Status FoldConstantExecutor::fold_constant_expr( - const TFoldConstantParams& params, PConstantExprResult* response) { +Status FoldConstantExecutor::fold_constant_expr(const TFoldConstantParams& params, + PConstantExprResult* response) { SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); const auto& expr_map = params.expr_map; auto expr_result_map = response->mutable_expr_result_map(); @@ -99,8 +99,8 @@ Status FoldConstantExecutor::fold_constant_expr( return Status::OK(); } -Status FoldConstantExecutor::fold_constant_vexpr( - const TFoldConstantParams& params, PConstantExprResult* response) { +Status FoldConstantExecutor::fold_constant_vexpr(const TFoldConstantParams& params, + PConstantExprResult* response) { const auto& expr_map = params.expr_map; auto expr_result_map = response->mutable_expr_result_map(); @@ -128,7 +128,7 @@ Status FoldConstantExecutor::fold_constant_vexpr( vectorized::Block tmp_block; tmp_block.insert({vectorized::ColumnUInt8::create(1), - std::make_shared(), ""}); + std::make_shared(), ""}); int result_column = -1; // calc vexpr RETURN_IF_ERROR(ctx->execute(&tmp_block, &result_column)); @@ -146,7 +146,8 @@ Status FoldConstantExecutor::fold_constant_vexpr( } else { expr_result.set_success(true); auto string_ref = column_ptr->get_data_at(0); - result = _get_result((void*)string_ref.data, string_ref.size, ctx->root()->type().type); + result = _get_result((void*)string_ref.data, string_ref.size, + ctx->root()->type().type); } expr_result.set_content(std::move(result)); @@ -174,7 +175,8 @@ Status FoldConstantExecutor::_init(const TQueryGlobals& query_globals) { _runtime_state.reset(new RuntimeState(fragment_params.params, query_options, query_globals, ExecEnv::GetInstance())); DescriptorTbl* desc_tbl = nullptr; - Status status = DescriptorTbl::create(_runtime_state->obj_pool(), TDescriptorTable(), &desc_tbl); + Status status = + DescriptorTbl::create(_runtime_state->obj_pool(), TDescriptorTable(), &desc_tbl); if (UNLIKELY(!status.ok())) { LOG(WARNING) << "Failed to create descriptor table, msg: " << status.get_error_msg(); return Status::Uninitialized(status.get_error_msg()); @@ -188,7 +190,8 @@ Status FoldConstantExecutor::_init(const TQueryGlobals& query_globals) { _runtime_profile = _runtime_state->runtime_profile(); _runtime_profile->set_name("FoldConstantExpr"); - _mem_tracker = MemTracker::create_tracker(-1, "FoldConstantExpr", _runtime_state->instance_mem_tracker()); + _mem_tracker = MemTracker::create_tracker(-1, "FoldConstantExpr", + _runtime_state->instance_mem_tracker()); _mem_pool.reset(new MemPool(_mem_tracker.get())); return Status::OK(); @@ -201,7 +204,7 @@ Status FoldConstantExecutor::_prepare_and_open(Context* ctx) { } template -string FoldConstantExecutor::_get_result(void* src, size_t size, PrimitiveType slot_type){ +string FoldConstantExecutor::_get_result(void* src, size_t size, PrimitiveType slot_type) { switch (slot_type) { case TYPE_BOOLEAN: { bool val = *reinterpret_cast(src); @@ -253,7 +256,7 @@ string FoldConstantExecutor::_get_result(void* src, size_t size, PrimitiveType s date_value->to_string(str); return str; } else { - const DateTimeValue date_value = *reinterpret_cast(src); + const DateTimeValue date_value = *reinterpret_cast(src); char str[MAX_DTVALUE_STR_LEN]; date_value.to_string(str); return str; @@ -268,6 +271,4 @@ string FoldConstantExecutor::_get_result(void* src, size_t size, PrimitiveType s } } - -} - +} // namespace doris diff --git a/be/src/runtime/large_int_value.h b/be/src/runtime/large_int_value.h index a7389d326a..090331bd02 100644 --- a/be/src/runtime/large_int_value.h +++ b/be/src/runtime/large_int_value.h @@ -41,9 +41,7 @@ public: return fmt::format_to(buffer, "{}", value) - buffer; } - static std::string to_string(__int128 value) { - return fmt::format("{}", value); - } + static std::string to_string(__int128 value) { return fmt::format("{}", value); } }; std::ostream& operator<<(std::ostream& os, __int128 const& value); @@ -55,7 +53,11 @@ std::size_t hash_value(LargeIntValue const& value); } // namespace doris // Thirdparty printers like gtest needs operator<< to be exported into global namespace, so that ADL will work. -inline std::ostream& operator<<(std::ostream& os, __int128 const& value) { return doris::operator<<(os, value); } -inline std::istream& operator>>(std::istream& is, __int128& value) { return doris::operator>>(is, value); } +inline std::ostream& operator<<(std::ostream& os, __int128 const& value) { + return doris::operator<<(os, value); +} +inline std::istream& operator>>(std::istream& is, __int128& value) { + return doris::operator>>(is, value); +} #endif diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp index eb8756c81a..ce5448a910 100644 --- a/be/src/runtime/load_channel.cpp +++ b/be/src/runtime/load_channel.cpp @@ -26,10 +26,13 @@ namespace doris { LoadChannel::LoadChannel(const UniqueId& load_id, int64_t mem_limit, int64_t timeout_s, bool is_high_priority, const std::string& sender_ip, bool is_vec) - : _load_id(load_id), _timeout_s(timeout_s), _is_high_priority(is_high_priority), - _sender_ip(sender_ip), _is_vec(is_vec) { - _mem_tracker = MemTracker::create_tracker( - mem_limit, "LoadChannel:" + _load_id.to_string(), nullptr, MemTrackerLevel::TASK); + : _load_id(load_id), + _timeout_s(timeout_s), + _is_high_priority(is_high_priority), + _sender_ip(sender_ip), + _is_vec(is_vec) { + _mem_tracker = MemTracker::create_tracker(mem_limit, "LoadChannel:" + _load_id.to_string(), + nullptr, MemTrackerLevel::TASK); // _last_updated_time should be set before being inserted to // _load_channels in load_channel_mgr, or it may be erased // immediately by gc thread. @@ -39,7 +42,8 @@ LoadChannel::LoadChannel(const UniqueId& load_id, int64_t mem_limit, int64_t tim LoadChannel::~LoadChannel() { LOG(INFO) << "load channel removed. mem peak usage=" << _mem_tracker->peak_consumption() << ", info=" << _mem_tracker->debug_string() << ", load_id=" << _load_id - << ", is high priority=" << _is_high_priority << ", sender_ip=" << _sender_ip << ", is_vec=" << _is_vec; + << ", is high priority=" << _is_high_priority << ", sender_ip=" << _sender_ip + << ", is_vec=" << _is_vec; } Status LoadChannel::open(const PTabletWriterOpenRequest& params) { @@ -66,7 +70,8 @@ Status LoadChannel::open(const PTabletWriterOpenRequest& params) { return Status::OK(); } -Status LoadChannel::_get_tablets_channel(std::shared_ptr& channel, bool& is_finished, const int64_t index_id) { +Status LoadChannel::_get_tablets_channel(std::shared_ptr& channel, + bool& is_finished, const int64_t index_id) { std::lock_guard l(_lock); auto it = _tablets_channels.find(index_id); if (it == _tablets_channels.end()) { @@ -136,4 +141,4 @@ Status LoadChannel::cancel() { return Status::OK(); } -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h index 37ee8453c9..fa88ec20e2 100644 --- a/be/src/runtime/load_channel.h +++ b/be/src/runtime/load_channel.h @@ -48,8 +48,7 @@ public: // this batch must belong to a index in one transaction template - Status add_batch(const TabletWriterAddRequest& request, - TabletWriterAddResult* response); + Status add_batch(const TabletWriterAddRequest& request, TabletWriterAddResult* response); // return true if this load channel has been opened and all tablets channels are closed then. bool is_finished(); @@ -73,19 +72,16 @@ public: bool is_high_priority() const { return _is_high_priority; } protected: - Status _get_tablets_channel(std::shared_ptr& channel, - bool& is_finished, + Status _get_tablets_channel(std::shared_ptr& channel, bool& is_finished, const int64_t index_id); - - template - Status _handle_eos(std::shared_ptr& channel, - const Request& request, + + template + Status _handle_eos(std::shared_ptr& channel, const Request& request, Response* response) { bool finished = false; auto index_id = request.index_id(); - RETURN_IF_ERROR(channel->close(request.sender_id(), request.backend_id(), - &finished, request.partition_ids(), - response->mutable_tablet_vec())); + RETURN_IF_ERROR(channel->close(request.sender_id(), request.backend_id(), &finished, + request.partition_ids(), response->mutable_tablet_vec())); if (finished) { std::lock_guard l(_lock); _tablets_channels.erase(index_id); @@ -94,7 +90,6 @@ protected: return Status::OK(); } - private: // when mem consumption exceeds limit, should call this method to find the channel // that consumes the largest memory(, and then we can reduce its memory usage). @@ -169,8 +164,8 @@ Status LoadChannel::add_batch(const TabletWriterAddRequest& request, inline std::ostream& operator<<(std::ostream& os, const LoadChannel& load_channel) { os << "LoadChannel(id=" << load_channel.load_id() << ", mem=" << load_channel.mem_consumption() - << ", last_update_time=" << static_cast(load_channel.last_updated_time()) - << ", is high priority: " << load_channel.is_high_priority() << ")"; + << ", last_update_time=" << static_cast(load_channel.last_updated_time()) + << ", is high priority: " << load_channel.is_high_priority() << ")"; return os; } diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp index 35f7c3b82f..7b2ee5bb26 100644 --- a/be/src/runtime/load_channel_mgr.cpp +++ b/be/src/runtime/load_channel_mgr.cpp @@ -88,14 +88,16 @@ Status LoadChannelMgr::init(int64_t process_mem_limit) { MemTracker::get_process_tracker(), MemTrackerLevel::OVERVIEW); SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); - REGISTER_HOOK_METRIC(load_channel_mem_consumption, [this]() { return _mem_tracker->consumption(); }); + REGISTER_HOOK_METRIC(load_channel_mem_consumption, + [this]() { return _mem_tracker->consumption(); }); _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024); RETURN_IF_ERROR(_start_bg_worker()); return Status::OK(); } -LoadChannel* LoadChannelMgr::_create_load_channel(const UniqueId& load_id, int64_t mem_limit, int64_t timeout_s, - bool is_high_priority, const std::string& sender_ip, bool is_vec) { +LoadChannel* LoadChannelMgr::_create_load_channel(const UniqueId& load_id, int64_t mem_limit, + int64_t timeout_s, bool is_high_priority, + const std::string& sender_ip, bool is_vec) { return new LoadChannel(load_id, mem_limit, timeout_s, is_high_priority, sender_ip, is_vec); } @@ -119,8 +121,9 @@ Status LoadChannelMgr::open(const PTabletWriterOpenRequest& params) { int64_t job_timeout_s = calc_job_timeout_s(timeout_in_req_s); bool is_high_priority = (params.has_is_high_priority() && params.is_high_priority()); - channel.reset(_create_load_channel(load_id, job_max_memory, job_timeout_s, is_high_priority, - params.sender_ip(), params.is_vectorized())); + channel.reset(_create_load_channel(load_id, job_max_memory, job_timeout_s, + is_high_priority, params.sender_ip(), + params.is_vectorized())); _load_channels.insert({load_id, channel}); } } @@ -136,8 +139,7 @@ void LoadChannelMgr::_finish_load_channel(const UniqueId load_id) { { std::lock_guard l(_lock); _load_channels.erase(load_id); - auto handle = - _last_success_channel->insert(load_id.to_string(), nullptr, 1, dummy_deleter); + auto handle = _last_success_channel->insert(load_id.to_string(), nullptr, 1, dummy_deleter); _last_success_channel->release(handle); } VLOG_CRITICAL << "removed load channel " << load_id; diff --git a/be/src/runtime/load_channel_mgr.h b/be/src/runtime/load_channel_mgr.h index 292f3776e8..c9e84019b1 100644 --- a/be/src/runtime/load_channel_mgr.h +++ b/be/src/runtime/load_channel_mgr.h @@ -53,17 +53,17 @@ public: Status open(const PTabletWriterOpenRequest& request); template - Status add_batch(const TabletWriterAddRequest& request, - TabletWriterAddResult* response); + Status add_batch(const TabletWriterAddRequest& request, TabletWriterAddResult* response); // cancel all tablet stream for 'load_id' load Status cancel(const PTabletWriterCancelRequest& request); private: - static LoadChannel* _create_load_channel(const UniqueId& load_id, int64_t mem_limit, int64_t timeout_s, - bool is_high_priority, const std::string& sender_ip, bool is_vec); + static LoadChannel* _create_load_channel(const UniqueId& load_id, int64_t mem_limit, + int64_t timeout_s, bool is_high_priority, + const std::string& sender_ip, bool is_vec); - template + template Status _get_load_channel(std::shared_ptr& channel, bool& is_eof, const UniqueId& load_id, const Request& request); @@ -90,11 +90,9 @@ protected: Status _start_load_channels_clean(); }; -template -Status LoadChannelMgr::_get_load_channel(std::shared_ptr& channel, - bool& is_eof, - const UniqueId& load_id, - const Request& request) { +template +Status LoadChannelMgr::_get_load_channel(std::shared_ptr& channel, bool& is_eof, + const UniqueId& load_id, const Request& request) { is_eof = false; std::lock_guard l(_lock); auto it = _load_channels.find(load_id); @@ -147,5 +145,4 @@ Status LoadChannelMgr::add_batch(const TabletWriterAddRequest& request, return Status::OK(); } - } // namespace doris diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h index c62f2bdf6a..fc17854f48 100644 --- a/be/src/runtime/mem_pool.h +++ b/be/src/runtime/mem_pool.h @@ -274,7 +274,7 @@ private: template Status ALWAYS_INLINE allocate_safely(int64_t size, int alignment, uint8_t*& ret, - Status* rst = nullptr) { + Status* rst = nullptr) { uint8_t* result = allocate(size, alignment, rst); if (result == nullptr) { return Status::OLAPInternalError(OLAP_ERR_MALLOC_ERROR); diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp index 49f8862b0b..f42de2ef59 100644 --- a/be/src/runtime/mem_tracker.cpp +++ b/be/src/runtime/mem_tracker.cpp @@ -65,7 +65,8 @@ static std::shared_ptr brpc_server_tracker; static GoogleOnceType brpc_server_tracker_once = GOOGLE_ONCE_INIT; void MemTracker::create_brpc_server_tracker() { - brpc_server_tracker = MemTracker::create_tracker(-1, "Brpc", get_process_tracker(), MemTrackerLevel::OVERVIEW); + brpc_server_tracker = MemTracker::create_tracker(-1, "Brpc", get_process_tracker(), + MemTrackerLevel::OVERVIEW); } std::shared_ptr MemTracker::get_brpc_server_tracker() { diff --git a/be/src/runtime/minidump.cpp b/be/src/runtime/minidump.cpp index 7d527db020..bdcd6e9fa3 100644 --- a/be/src/runtime/minidump.cpp +++ b/be/src/runtime/minidump.cpp @@ -37,8 +37,7 @@ struct FileStat { std::string abs_path; time_t create_time; - FileStat(const std::string& path_, time_t ctime) - : abs_path(path_), create_time(ctime) {} + FileStat(const std::string& path_, time_t ctime) : abs_path(path_), create_time(ctime) {} }; Status Minidump::init() { @@ -46,7 +45,7 @@ Status Minidump::init() { LOG(INFO) << "minidump is disabled"; return Status::OK(); } - + // 1. create minidump dir RETURN_IF_ERROR(FileUtils::create_dir(config::minidump_dir)); @@ -55,14 +54,15 @@ Status Minidump::init() { if (config::max_minidump_file_size_mb > 0) { minidump_descriptor.set_size_limit(config::max_minidump_file_size_mb * 1024 * 1024); } - _error_handler.reset(new google_breakpad::ExceptionHandler(minidump_descriptor, nullptr, _minidump_cb, nullptr, true, -1)); + _error_handler.reset(new google_breakpad::ExceptionHandler(minidump_descriptor, nullptr, + _minidump_cb, nullptr, true, -1)); // 3. setup sig handler - _setup_sig_handler(); + _setup_sig_handler(); RETURN_IF_ERROR(Thread::create( - "Minidump", "minidump_clean_thread", - [this]() { this->_clean_old_minidump(); }, &_clean_thread)); + "Minidump", "minidump_clean_thread", [this]() { this->_clean_old_minidump(); }, + &_clean_thread)); LOG(INFO) << "Minidump is enabled. dump file will be saved at " << config::minidump_dir; return Status::OK(); @@ -76,7 +76,8 @@ Status Minidump::_setup_sig_handler() { sig_action.sa_flags = SA_SIGINFO; // use sa_sigaction instead of sa_handler sig_action.sa_sigaction = &(this->_usr1_sigaction); if (sigaction(_signo, &sig_action, nullptr) == -1) { - return Status::InternalError("failed to install signal handler for " + std::to_string(_signo)); + return Status::InternalError("failed to install signal handler for " + + std::to_string(_signo)); } return Status::OK(); } @@ -87,8 +88,8 @@ void Minidump::_usr1_sigaction(int signum, siginfo_t* info, void* context) { _error_handler->WriteMinidump(); } -bool Minidump::_minidump_cb(const google_breakpad::MinidumpDescriptor& descriptor, - void* context, bool succeeded) { +bool Minidump::_minidump_cb(const google_breakpad::MinidumpDescriptor& descriptor, void* context, + bool succeeded) { // use sys_write supported by `linux syscall`, recommended by breakpad doc. const char* msg = "Minidump created at: "; sys_write(STDOUT_FILENO, msg, strlen(msg)); @@ -112,7 +113,7 @@ void Minidump::stop() { } void Minidump::_clean_old_minidump() { - while(!_stop) { + while (!_stop) { sleep(10); if (config::max_minidump_file_number <= 0) { continue; @@ -137,15 +138,16 @@ void Minidump::_clean_old_minidump() { std::vector stats; for (auto it = files.begin(); it != files.end(); ++it) { std::string path = config::minidump_dir + "/" + *it; - + struct stat buf; if ((ret = stat(path.c_str(), &buf)) != 0) { - LOG(WARNING) << "Failed to stat minidump file: " << path << ", remote it. errno: " << ret; - FileUtils::remove(path); + LOG(WARNING) << "Failed to stat minidump file: " << path + << ", remote it. errno: " << ret; + FileUtils::remove(path); continue; } - stats.emplace_back(path, buf.st_ctime); + stats.emplace_back(path, buf.st_ctime); } // sort file by ctime ascending @@ -156,7 +158,7 @@ void Minidump::_clean_old_minidump() { return true; } }); - + int to_delete = stats.size() - config::max_minidump_file_number; int deleted = 0; for (auto it = stats.begin(); it != stats.end() && deleted < to_delete; it++, deleted++) { diff --git a/be/src/runtime/minidump.h b/be/src/runtime/minidump.h index 78e836cd4d..026dbc1e01 100644 --- a/be/src/runtime/minidump.h +++ b/be/src/runtime/minidump.h @@ -41,11 +41,11 @@ public: private: // The callback after writing the minidump file - static bool _minidump_cb(const google_breakpad::MinidumpDescriptor& descriptor, - void* context, bool succeeded); + static bool _minidump_cb(const google_breakpad::MinidumpDescriptor& descriptor, void* context, + bool succeeded); // The handle function when receiving SIGUSR1 signal. static void _usr1_sigaction(int signum, siginfo_t* info, void* context); - + // try clean old minidump files periodically. // To keep at most config::max_minidump_number files. void _clean_old_minidump(); diff --git a/be/src/runtime/odbc_table_sink.h b/be/src/runtime/odbc_table_sink.h index 3f9c8fd3b2..26c3b948bd 100644 --- a/be/src/runtime/odbc_table_sink.h +++ b/be/src/runtime/odbc_table_sink.h @@ -37,7 +37,7 @@ class ExprContext; class OdbcTableSink : public DataSink { public: OdbcTableSink(ObjectPool* pool, const RowDescriptor& row_desc, - const std::vector& t_exprs); + const std::vector& t_exprs); virtual ~OdbcTableSink(); @@ -55,9 +55,7 @@ public: // hosts. Further send() calls are illegal after calling close(). virtual Status close(RuntimeState* state, Status exec_status); - virtual RuntimeProfile* profile() { - return _profile; - } + virtual RuntimeProfile* profile() { return _profile; } private: ObjectPool* _pool; diff --git a/be/src/runtime/query_fragments_ctx.h b/be/src/runtime/query_fragments_ctx.h index 720cda4146..f8fa5fe10a 100644 --- a/be/src/runtime/query_fragments_ctx.h +++ b/be/src/runtime/query_fragments_ctx.h @@ -56,14 +56,11 @@ public: if (cpu_limit > 0) { // For now, cpu_limit will be the max concurrency of the scan thread pool token. _thread_token = _exec_env->limited_scan_thread_pool()->new_token( - ThreadPool::ExecutionMode::CONCURRENT, - cpu_limit); + ThreadPool::ExecutionMode::CONCURRENT, cpu_limit); } } - ThreadPoolToken* get_token() { - return _thread_token.get(); - } + ThreadPoolToken* get_token() { return _thread_token.get(); } public: TUniqueId query_id; @@ -84,6 +81,7 @@ public: std::atomic fragment_num; int timeout_second; ObjectPool obj_pool; + private: ExecEnv* _exec_env; DateTimeValue _start_time; @@ -96,5 +94,4 @@ private: std::unique_ptr _thread_token; }; -} // end of namespace - +} // namespace doris diff --git a/be/src/runtime/query_statistics.h b/be/src/runtime/query_statistics.h index 40feedcbc1..032023456d 100644 --- a/be/src/runtime/query_statistics.h +++ b/be/src/runtime/query_statistics.h @@ -50,7 +50,8 @@ private: // or plan's statistics and QueryStatisticsRecvr is responsible for collecting it. class QueryStatistics { public: - QueryStatistics() : scan_rows(0), scan_bytes(0), cpu_ms(0), returned_rows(0), max_peak_memory_bytes(0) {} + QueryStatistics() + : scan_rows(0), scan_bytes(0), cpu_ms(0), returned_rows(0), max_peak_memory_bytes(0) {} ~QueryStatistics(); void merge(const QueryStatistics& other); @@ -75,7 +76,9 @@ public: void set_returned_rows(int64_t num_rows) { this->returned_rows = num_rows; } - void set_max_peak_memory_bytes(int64_t max_peak_memory_bytes) { this->max_peak_memory_bytes = max_peak_memory_bytes; } + void set_max_peak_memory_bytes(int64_t max_peak_memory_bytes) { + this->max_peak_memory_bytes = max_peak_memory_bytes; + } void merge(QueryStatisticsRecvr* recvr); diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h index 3115f50efe..6c0e43cb69 100644 --- a/be/src/runtime/raw_value.h +++ b/be/src/runtime/raw_value.h @@ -91,7 +91,8 @@ public: static uint32_t zlib_crc32(const void* value, const TypeDescriptor& type, uint32_t seed); // Same as the up function, only use in vec exec engine. - static uint32_t zlib_crc32(const void* value, size_t len, const TypeDescriptor& type, uint32_t seed); + static uint32_t zlib_crc32(const void* value, size_t len, const TypeDescriptor& type, + uint32_t seed); // Compares both values. // Return value is < 0 if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2. @@ -408,7 +409,8 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type, // NOTE: this is just for split data, decimal use old doris hash function // Because crc32 hardware is not equal with zlib crc32 -inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const TypeDescriptor& type, uint32_t seed) { +inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const TypeDescriptor& type, + uint32_t seed) { // Hash_combine with v = 0 if (v == nullptr) { uint32_t value = 0x9e3779b9; diff --git a/be/src/runtime/routine_load/data_consumer.cpp b/be/src/runtime/routine_load/data_consumer.cpp index 33934bbd1d..5573955fd1 100644 --- a/be/src/runtime/routine_load/data_consumer.cpp +++ b/be/src/runtime/routine_load/data_consumer.cpp @@ -225,7 +225,8 @@ Status KafkaDataConsumer::group_consume(BlockingQueue* queue, LOG(INFO) << "kafka consume timeout: " << _id; break; case RdKafka::ERR__TRANSPORT: - LOG(INFO) << "kafka consume Disconnected: " << _id << ", retry times: " << retry_times++; + LOG(INFO) << "kafka consume Disconnected: " << _id + << ", retry times: " << retry_times++; if (retry_times <= MAX_RETRY_TIMES_FOR_TRANSPORT_FAILURE) { std::this_thread::sleep_for(std::chrono::milliseconds(200)); break; diff --git a/be/src/runtime/routine_load/data_consumer.h b/be/src/runtime/routine_load/data_consumer.h index 17b1810083..173ffbd82a 100644 --- a/be/src/runtime/routine_load/data_consumer.h +++ b/be/src/runtime/routine_load/data_consumer.h @@ -144,10 +144,10 @@ public: Status get_partition_meta(std::vector* partition_ids); // get offsets for times Status get_offsets_for_times(const std::vector& times, - std::vector* offsets); + std::vector* offsets); // get latest offsets for partitions Status get_latest_offsets_for_partitions(const std::vector& partition_ids, - std::vector* offsets); + std::vector* offsets); private: std::string _brokers; diff --git a/be/src/runtime/routine_load/data_consumer_group.cpp b/be/src/runtime/routine_load/data_consumer_group.cpp index 7242fbe602..bdf3225a53 100644 --- a/be/src/runtime/routine_load/data_consumer_group.cpp +++ b/be/src/runtime/routine_load/data_consumer_group.cpp @@ -126,8 +126,8 @@ Status KafkaDataConsumerGroup::start_all(StreamLoadContext* ctx) { << ", left_time: " << left_time << ", left_rows: " << left_rows << ", left_bytes: " << left_bytes << ", blocking get time(us): " << _queue.total_get_wait_time() / 1000 - << ", blocking put time(us): " << _queue.total_put_wait_time() / 1000 - << ", " << ctx->brief(); + << ", blocking put time(us): " << _queue.total_put_wait_time() / 1000 << ", " + << ctx->brief(); // shutdown queue _queue.shutdown(); @@ -157,7 +157,7 @@ Status KafkaDataConsumerGroup::start_all(StreamLoadContext* ctx) { << ", len: " << msg->len(); Status st = (kafka_pipe.get()->*append_data)(static_cast(msg->payload()), - static_cast(msg->len())); + static_cast(msg->len())); if (st.ok()) { left_rows--; left_bytes -= msg->len(); diff --git a/be/src/runtime/routine_load/data_consumer_pool.cpp b/be/src/runtime/routine_load/data_consumer_pool.cpp index 5f076adc81..a134697c00 100644 --- a/be/src/runtime/routine_load/data_consumer_pool.cpp +++ b/be/src/runtime/routine_load/data_consumer_pool.cpp @@ -69,8 +69,7 @@ Status DataConsumerPool::get_consumer_grp(StreamLoadContext* ctx, DCHECK(ctx->kafka_info); if (ctx->kafka_info->begin_offset.size() == 0) { - return Status::InternalError( - "PAUSE: The size of begin_offset of task should not be 0."); + return Status::InternalError("PAUSE: The size of begin_offset of task should not be 0."); } std::shared_ptr grp = std::make_shared(); diff --git a/be/src/runtime/routine_load/routine_load_task_executor.cpp b/be/src/runtime/routine_load/routine_load_task_executor.cpp index e882f3eaac..076cbe33d6 100644 --- a/be/src/runtime/routine_load/routine_load_task_executor.cpp +++ b/be/src/runtime/routine_load/routine_load_task_executor.cpp @@ -105,8 +105,8 @@ Status RoutineLoadTaskExecutor::get_kafka_partition_meta(const PKafkaMetaProxyRe return st; } -Status RoutineLoadTaskExecutor::get_kafka_partition_offsets_for_times(const PKafkaMetaProxyRequest& request, - std::vector* partition_offsets) { +Status RoutineLoadTaskExecutor::get_kafka_partition_offsets_for_times( + const PKafkaMetaProxyRequest& request, std::vector* partition_offsets) { CHECK(request.has_kafka_info()); // This context is meaningless, just for unifing the interface @@ -125,8 +125,8 @@ Status RoutineLoadTaskExecutor::get_kafka_partition_offsets_for_times(const PKaf return st; } -Status RoutineLoadTaskExecutor::get_kafka_latest_offsets_for_partitions(const PKafkaMetaProxyRequest& request, - std::vector* partition_offsets) { +Status RoutineLoadTaskExecutor::get_kafka_latest_offsets_for_partitions( + const PKafkaMetaProxyRequest& request, std::vector* partition_offsets) { CHECK(request.has_kafka_info()); // This context is meaningless, just for unifing the interface @@ -136,10 +136,12 @@ Status RoutineLoadTaskExecutor::get_kafka_latest_offsets_for_partitions(const PK std::shared_ptr consumer; RETURN_IF_ERROR(_data_consumer_pool.get_consumer(&ctx, &consumer)); - Status st = std::static_pointer_cast(consumer)->get_latest_offsets_for_partitions( - std::vector(request.partition_id_for_latest_offsets().begin(), - request.partition_id_for_latest_offsets().end()), - partition_offsets); + Status st = + std::static_pointer_cast(consumer) + ->get_latest_offsets_for_partitions( + std::vector(request.partition_id_for_latest_offsets().begin(), + request.partition_id_for_latest_offsets().end()), + partition_offsets); if (st.ok()) { _data_consumer_pool.return_consumer(consumer); } @@ -159,7 +161,8 @@ Status RoutineLoadTaskExecutor::submit_task(const TRoutineLoadTask& task) { << ", job id: " << task.job_id << ", queue size: " << _thread_pool.get_queue_size() << ", current tasks num: " << _task_map.size(); - return Status::TooManyTasks(UniqueId(task.id).to_string() + "_" + BackendOptions::get_localhost()); + return Status::TooManyTasks(UniqueId(task.id).to_string() + "_" + + BackendOptions::get_localhost()); } // create the context @@ -341,7 +344,7 @@ void RoutineLoadTaskExecutor::exec_task(StreamLoadContext* ctx, DataConsumerPool _data_consumer_pool.return_consumer(consumer); // delete TopicPartition finally - Defer delete_tp{[&topic_partitions]() { + Defer delete_tp {[&topic_partitions]() { std::for_each(topic_partitions.begin(), topic_partitions.end(), [](RdKafka::TopicPartition* tp1) { delete tp1; }); }}; @@ -371,7 +374,7 @@ void RoutineLoadTaskExecutor::err_handler(StreamLoadContext* ctx, const Status& // for test only Status RoutineLoadTaskExecutor::_execute_plan_for_test(StreamLoadContext* ctx) { auto mock_consumer = [this, ctx]() { - ctx->ref(); + ctx->ref(); std::shared_ptr pipe = _exec_env->load_stream_mgr()->get(ctx->id); bool eof = false; std::stringstream ss; diff --git a/be/src/runtime/routine_load/routine_load_task_executor.h b/be/src/runtime/routine_load/routine_load_task_executor.h index 41f20b106e..6c391a6888 100644 --- a/be/src/runtime/routine_load/routine_load_task_executor.h +++ b/be/src/runtime/routine_load/routine_load_task_executor.h @@ -53,10 +53,10 @@ public: std::vector* partition_ids); Status get_kafka_partition_offsets_for_times(const PKafkaMetaProxyRequest& request, - std::vector* partition_offsets); + std::vector* partition_offsets); Status get_kafka_latest_offsets_for_partitions(const PKafkaMetaProxyRequest& request, - std::vector* partition_offsets); + std::vector* partition_offsets); private: // execute the task diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp index 06306df512..f8727abe6b 100644 --- a/be/src/runtime/row_batch.cpp +++ b/be/src/runtime/row_batch.cpp @@ -173,7 +173,8 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch) CollectionValue* array_val = tuple->get_collection_slot(slot_collection->tuple_offset()); - CollectionValue::deserialize_collection(array_val, tuple_data, slot_collection->type()); + CollectionValue::deserialize_collection(array_val, tuple_data, + slot_collection->type()); } } } @@ -257,7 +258,7 @@ Status RowBatch::serialize(PRowBatch* output_batch, size_t* uncompressed_size, continue; } // Record offset before creating copy (which increments offset and tuple_data) - mutable_tuple_offsets->Add((int32_t) offset); + mutable_tuple_offsets->Add((int32_t)offset); mutable_new_tuple_offsets->Add(offset); row->get_tuple(j)->deep_copy(*desc, &tuple_data, &offset, /* convert_ptrs */ true); CHECK_LE(offset, size); diff --git a/be/src/runtime/row_batch_interface.hpp b/be/src/runtime/row_batch_interface.hpp index 24fd824529..3aa002a79b 100644 --- a/be/src/runtime/row_batch_interface.hpp +++ b/be/src/runtime/row_batch_interface.hpp @@ -16,7 +16,7 @@ // under the License. #ifndef DORIS_BE_SRC_QUERY_BE_RUNTIME_ROW_BATCH_INTERFACE_H -#define DORIS_BE_SRC_QUERY_BE_RUNTIME_ROW_BATCH_INTERFACE_H +#define DORIS_BE_SRC_QUERY_BE_RUNTIME_ROW_BATCH_INTERFACE_H namespace doris { @@ -25,6 +25,6 @@ public: virtual ~RowBatchInterface() {} }; -} +} // namespace doris #endif diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 008e9d2e58..e29609b9b3 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -138,8 +138,7 @@ Status RuntimeFilterMgr::get_merge_addr(TNetworkAddress* addr) { } Status RuntimeFilterMergeControllerEntity::_init_with_desc( - const TRuntimeFilterDesc* runtime_filter_desc, - const TQueryOptions* query_options, + const TRuntimeFilterDesc* runtime_filter_desc, const TQueryOptions* query_options, const std::vector* target_info, const int producer_size) { std::lock_guard guard(_filter_map_mutex); @@ -154,9 +153,11 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( std::string filter_id = std::to_string(runtime_filter_desc->filter_id); // LOG(INFO) << "entity filter id:" << filter_id; - cntVal->filter->init_with_desc(&cntVal->runtime_filter_desc, query_options, _fragment_instance_id); + cntVal->filter->init_with_desc(&cntVal->runtime_filter_desc, query_options, + _fragment_instance_id); cntVal->tracker = MemTracker::create_tracker( - -1, tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()->label() + ":FilterID:" + filter_id, + -1, + tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()->label() + ":FilterID:" + filter_id, tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()); _filter_map.emplace(filter_id, cntVal); return Status::OK(); @@ -179,7 +180,8 @@ Status RuntimeFilterMergeControllerEntity::init(UniqueId query_id, UniqueId frag if (build_iter == runtime_filter_params.runtime_filter_builder_num.end()) { return Status::InternalError("runtime filter params meet error"); } - _init_with_desc(&filterid_to_desc.second, &query_options, &target_iter->second, build_iter->second); + _init_with_desc(&filterid_to_desc.second, &query_options, &target_iter->second, + build_iter->second); } return Status::OK(); } @@ -290,7 +292,8 @@ Status RuntimeFilterMergeController::add_entity( _filter_controller_map[query_id_str] = *handle; const TRuntimeFilterParams& filter_params = params.params.runtime_filter_params; if (params.params.__isset.runtime_filter_params) { - RETURN_IF_ERROR(handle->get()->init(query_id, fragment_instance_id, filter_params, params.query_options)); + RETURN_IF_ERROR(handle->get()->init(query_id, fragment_instance_id, filter_params, + params.query_options)); } } else { *handle = _filter_controller_map[query_id_str].lock(); diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index e0d5d8104f..74e89873fd 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -205,7 +205,8 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) { if (bytes_limit > MemTracker::get_process_tracker()->limit()) { VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES) << " exceeds process memory limit of " - << PrettyPrinter::print(MemTracker::get_process_tracker()->limit(), TUnit::BYTES) + << PrettyPrinter::print(MemTracker::get_process_tracker()->limit(), + TUnit::BYTES) << ". Using process memory limit instead"; bytes_limit = MemTracker::get_process_tracker()->limit(); } @@ -213,18 +214,19 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) { mem_tracker_counter->set(bytes_limit); if (query_type() == TQueryType::SELECT) { - _query_mem_tracker = _exec_env->task_pool_mem_tracker_registry()->register_query_mem_tracker( - print_id(query_id), bytes_limit); + _query_mem_tracker = + _exec_env->task_pool_mem_tracker_registry()->register_query_mem_tracker( + print_id(query_id), bytes_limit); } else if (query_type() == TQueryType::LOAD) { _query_mem_tracker = _exec_env->task_pool_mem_tracker_registry()->register_load_mem_tracker( - print_id(query_id), bytes_limit); + print_id(query_id), bytes_limit); } else { DCHECK(false); } - + _instance_mem_tracker = MemTracker::create_tracker( - bytes_limit, "RuntimeState:instance:" + print_id(_fragment_instance_id), _query_mem_tracker, - MemTrackerLevel::INSTANCE, &_profile); + bytes_limit, "RuntimeState:instance:" + print_id(_fragment_instance_id), + _query_mem_tracker, MemTrackerLevel::INSTANCE, &_profile); RETURN_IF_ERROR(init_buffer_poolstate()); @@ -379,7 +381,8 @@ Status RuntimeState::create_error_log_file() { return Status::OK(); } -Status RuntimeState::append_error_msg_to_file(std::function line, std::function error_msg, +Status RuntimeState::append_error_msg_to_file(std::function line, + std::function error_msg, bool* stop_processing, bool is_summary) { *stop_processing = false; if (query_type() != TQueryType::LOAD) { @@ -417,7 +420,8 @@ Status RuntimeState::append_error_msg_to_file(std::function line, // Note: export reason first in case src line too long and be truncated. fmt::format_to(out, "Reason: {}. src line [{}]; ", error_msg(), line()); } else if (_error_row_number == MAX_ERROR_NUM) { - fmt::format_to(out, "TOO MUCH ERROR! already reach {}. show no more next error.", MAX_ERROR_NUM); + fmt::format_to(out, "TOO MUCH ERROR! already reach {}. show no more next error.", + MAX_ERROR_NUM); } } diff --git a/be/src/runtime/small_file_mgr.cpp b/be/src/runtime/small_file_mgr.cpp index e7b842a347..6f88cacd70 100644 --- a/be/src/runtime/small_file_mgr.cpp +++ b/be/src/runtime/small_file_mgr.cpp @@ -20,7 +20,7 @@ #include #include -#include // boost::algorithm::starts_with +#include // boost::algorithm::starts_with #include #include "common/status.h" diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index 79fff7f02e..e8448a9c88 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -58,14 +58,15 @@ SnapshotLoader::SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id) _prop(std::map()), _storage_backend(nullptr) {} -SnapshotLoader::SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id, const std::map& prop) +SnapshotLoader::SnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id, + const std::map& prop) : _env(env), _job_id(job_id), _task_id(task_id), _broker_addr(TNetworkAddress()), _prop(prop) { - _storage_backend.reset(new S3StorageBackend(_prop)); - } + _storage_backend.reset(new S3StorageBackend(_prop)); +} SnapshotLoader::~SnapshotLoader() = default; @@ -155,7 +156,8 @@ Status SnapshotLoader::upload(const std::map& src_to_d // upload std::string full_remote_file = dest_path + "/" + local_file; std::string full_local_file = src_path + "/" + local_file; - RETURN_IF_ERROR(_storage_backend->upload_with_checksum(full_local_file, full_remote_file, md5sum)); + RETURN_IF_ERROR(_storage_backend->upload_with_checksum(full_local_file, + full_remote_file, md5sum)); } // end for each tablet's local files tablet_files->emplace(tablet_id, local_files_with_checksum); @@ -206,8 +208,9 @@ Status SnapshotLoader::download(const std::map& src_to int64_t remote_tablet_id; RETURN_IF_ERROR(_get_tablet_id_from_remote_path(remote_path, &remote_tablet_id)); - VLOG_CRITICAL << "get local tablet id: " << local_tablet_id << ", schema hash: " << schema_hash - << ", remote tablet id: " << remote_tablet_id; + VLOG_CRITICAL << "get local tablet id: " << local_tablet_id + << ", schema hash: " << schema_hash + << ", remote tablet id: " << remote_tablet_id; // 2.1. get local files std::vector local_files; @@ -223,7 +226,8 @@ Status SnapshotLoader::download(const std::map& src_to return Status::InternalError(ss.str()); } - TabletSharedPtr tablet = _env->storage_engine()->tablet_manager()->get_tablet(local_tablet_id); + TabletSharedPtr tablet = + _env->storage_engine()->tablet_manager()->get_tablet(local_tablet_id); if (tablet == nullptr) { std::stringstream ss; ss << "failed to get local tablet: " << local_tablet_id; @@ -257,7 +261,7 @@ Status SnapshotLoader::download(const std::map& src_to need_download = true; } else { VLOG_CRITICAL << "get local file checksum: " << remote_file << ": " - << local_md5sum; + << local_md5sum; if (file_stat.md5 != local_md5sum) { // file's checksum does not equal, download it. need_download = true; @@ -295,12 +299,13 @@ Status SnapshotLoader::download(const std::map& src_to status = FileUtils::md5sum(full_local_file, &downloaded_md5sum); if (!status.ok()) { std::stringstream ss; - ss << "failed to get md5sum of file: " << full_local_file << ", err: " << status.get_error_msg(); + ss << "failed to get md5sum of file: " << full_local_file + << ", err: " << status.get_error_msg(); LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } VLOG_CRITICAL << "get downloaded file checksum: " << full_local_file << ": " - << downloaded_md5sum; + << downloaded_md5sum; if (downloaded_md5sum != file_stat.md5) { std::stringstream ss; ss << "invalid md5 of downloaded file: " << full_local_file @@ -335,7 +340,7 @@ Status SnapshotLoader::download(const std::map& src_to // delete std::string full_local_file = local_path + "/" + local_file; VLOG_CRITICAL << "begin to delete local snapshot file: " << full_local_file - << ", it does not exist in remote"; + << ", it does not exist in remote"; if (remove(full_local_file.c_str()) != 0) { LOG(WARNING) << "failed to delete unknown local file: " << full_local_file << ", ignore it"; @@ -514,7 +519,7 @@ Status SnapshotLoader::_get_tablet_id_and_schema_hash_from_file_path(const std:: ss2 >> *tablet_id; VLOG_CRITICAL << "get tablet id " << *tablet_id << ", schema hash: " << *schema_hash - << " from path: " << src_path; + << " from path: " << src_path; return Status::OK(); } diff --git a/be/src/runtime/sorted_run_merger.cc b/be/src/runtime/sorted_run_merger.cc index 5bf518178d..5f4be7f2bf 100644 --- a/be/src/runtime/sorted_run_merger.cc +++ b/be/src/runtime/sorted_run_merger.cc @@ -128,8 +128,8 @@ public: Status init(bool* done) override { *done = false; _pull_task_thread = - std::thread(&SortedRunMerger::ParallelBatchedRowSupplier::process_sorted_run_task, - this, tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()); + std::thread(&SortedRunMerger::ParallelBatchedRowSupplier::process_sorted_run_task, + this, tls_ctx()->_thread_mem_tracker_mgr->mem_tracker()); RETURN_IF_ERROR(next(nullptr, done)); return Status::OK(); diff --git a/be/src/runtime/spill_sorter.cc b/be/src/runtime/spill_sorter.cc index efd0692585..faab00f4d6 100644 --- a/be/src/runtime/spill_sorter.cc +++ b/be/src/runtime/spill_sorter.cc @@ -164,9 +164,7 @@ private: const vector& var_values); // Returns true if we have var-len slots and there are var-len blocks. - bool has_var_len_blocks() const { - return _has_var_len_slots && !_var_len_blocks.empty(); - } + bool has_var_len_blocks() const { return _has_var_len_slots && !_var_len_blocks.empty(); } // Parent sorter object. const SpillSorter* _sorter; diff --git a/be/src/runtime/stream_load/stream_load_context.h b/be/src/runtime/stream_load/stream_load_context.h index f4018e4e1a..e19f3c3675 100644 --- a/be/src/runtime/stream_load/stream_load_context.h +++ b/be/src/runtime/stream_load/stream_load_context.h @@ -98,7 +98,8 @@ public: std::string to_json() const; std::string prepare_stream_load_record(const std::string& stream_load_record); - static void parse_stream_load_record(const std::string& stream_load_record, TStreamLoadRecord& stream_load_item); + static void parse_stream_load_record(const std::string& stream_load_record, + TStreamLoadRecord& stream_load_item); // the old mini load result format is not same as stream load. // add this function for compatible with old mini load result format. diff --git a/be/src/runtime/stream_load/stream_load_executor.cpp b/be/src/runtime/stream_load/stream_load_executor.cpp index f93d7c97d7..4f080318b8 100644 --- a/be/src/runtime/stream_load/stream_load_executor.cpp +++ b/be/src/runtime/stream_load/stream_load_executor.cpp @@ -57,8 +57,9 @@ Status StreamLoadExecutor::execute_plan_fragment(StreamLoadContext* ctx) { int64_t num_selected_rows = ctx->number_total_rows - ctx->number_unselected_rows; - if (num_selected_rows > 0 && (double)ctx->number_filtered_rows / num_selected_rows > - ctx->max_filter_ratio) { + if (num_selected_rows > 0 && + (double)ctx->number_filtered_rows / num_selected_rows > + ctx->max_filter_ratio) { // NOTE: Do not modify the error message here, for historical reasons, // some users may rely on this error message. status = Status::InternalError("too many filtered rows"); @@ -175,7 +176,6 @@ Status StreamLoadExecutor::begin_txn(StreamLoadContext* ctx) { } Status StreamLoadExecutor::pre_commit_txn(StreamLoadContext* ctx) { - TLoadTxnCommitRequest request; get_commit_request(ctx, request); @@ -185,7 +185,7 @@ Status StreamLoadExecutor::pre_commit_txn(StreamLoadContext* ctx) { RETURN_IF_ERROR(ThriftRpcHelper::rpc( master_addr.hostname, master_addr.port, [&request, &result](FrontendServiceConnection& client) { - client->loadTxnPreCommit(result, request); + client->loadTxnPreCommit(result, request); }, config::txn_commit_rpc_timeout_ms)); #else @@ -221,7 +221,7 @@ Status StreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) { RETURN_IF_ERROR(ThriftRpcHelper::rpc( master_addr.hostname, master_addr.port, [&request, &result](FrontendServiceConnection& client) { - client->loadTxn2PC(result, request); + client->loadTxn2PC(result, request); }, config::txn_commit_rpc_timeout_ms)); Status status(result.status); @@ -232,7 +232,8 @@ Status StreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) { return Status::OK(); } -void StreamLoadExecutor::get_commit_request(StreamLoadContext* ctx, TLoadTxnCommitRequest& request) { +void StreamLoadExecutor::get_commit_request(StreamLoadContext* ctx, + TLoadTxnCommitRequest& request) { set_request_auth(&request, ctx->auth); request.db = ctx->db; if (ctx->db_id > 0) { diff --git a/be/src/runtime/stream_load/stream_load_pipe.h b/be/src/runtime/stream_load/stream_load_pipe.h index d5d3006aed..7872cab1e2 100644 --- a/be/src/runtime/stream_load/stream_load_pipe.h +++ b/be/src/runtime/stream_load/stream_load_pipe.h @@ -268,7 +268,6 @@ private: std::condition_variable _put_cond; std::condition_variable _get_cond; - ByteBufferPtr _write_buf; }; diff --git a/be/src/runtime/stream_load/stream_load_recorder.cpp b/be/src/runtime/stream_load/stream_load_recorder.cpp index 7029bc50af..83ed85d234 100644 --- a/be/src/runtime/stream_load/stream_load_recorder.cpp +++ b/be/src/runtime/stream_load/stream_load_recorder.cpp @@ -27,15 +27,11 @@ #include "rocksdb/utilities/db_ttl.h" #include "util/time.h" - namespace doris { const std::string STREAM_LOAD_POSTFIX = "/stream_load"; StreamLoadRecorder::StreamLoadRecorder(const std::string& root_path) - : _root_path(root_path), - _db(nullptr), - _last_compaction_time(UnixMillis()) { -} + : _root_path(root_path), _db(nullptr), _last_compaction_time(UnixMillis()) {} StreamLoadRecorder::~StreamLoadRecorder() { if (_db != nullptr) { @@ -59,10 +55,12 @@ Status StreamLoadRecorder::init() { // default column family is required column_families.emplace_back(DEFAULT_COLUMN_FAMILY, rocksdb::ColumnFamilyOptions()); std::vector ttls = {config::stream_load_record_expire_time_secs}; - rocksdb::Status s = rocksdb::DBWithTTL::Open(options, db_path, column_families, &_handles, &_db, ttls); + rocksdb::Status s = + rocksdb::DBWithTTL::Open(options, db_path, column_families, &_handles, &_db, ttls); if (!s.ok() || _db == nullptr) { LOG(WARNING) << "rocks db open failed, reason:" << s.ToString(); - return Status::InternalError("Stream load record rocksdb open failed, reason: " + s.ToString()); + return Status::InternalError("Stream load record rocksdb open failed, reason: " + + s.ToString()); } return Status::OK(); } @@ -74,10 +72,12 @@ Status StreamLoadRecorder::put(const std::string& key, const std::string& value) rocksdb::Status s = _db->Put(write_options, handle, rocksdb::Slice(key), rocksdb::Slice(value)); if (!s.ok()) { LOG(WARNING) << "rocks db put key:" << key << " failed, reason:" << s.ToString(); - return Status::InternalError("Stream load record rocksdb put failed, reason: " + s.ToString()); + return Status::InternalError("Stream load record rocksdb put failed, reason: " + + s.ToString()); } - if ((UnixMillis() - _last_compaction_time) / 1000 > config::clean_stream_load_record_interval_secs) { + if ((UnixMillis() - _last_compaction_time) / 1000 > + config::clean_stream_load_record_interval_secs) { rocksdb::CompactRangeOptions options; s = _db->CompactRange(options, _handles[0], nullptr, nullptr); if (s.ok()) { @@ -87,7 +87,8 @@ Status StreamLoadRecorder::put(const std::string& key, const std::string& value) return Status::OK(); } -Status StreamLoadRecorder::get_batch(const std::string& start, const int batch_size, std::map* stream_load_records) { +Status StreamLoadRecorder::get_batch(const std::string& start, const int batch_size, + std::map* stream_load_records) { rocksdb::ColumnFamilyHandle* handle = _handles[0]; std::unique_ptr it(_db->NewIterator(rocksdb::ReadOptions(), handle)); if (start == "-1") { diff --git a/be/src/runtime/string_buffer.hpp b/be/src/runtime/string_buffer.hpp index 3e5fbada0d..d2dafeba68 100644 --- a/be/src/runtime/string_buffer.hpp +++ b/be/src/runtime/string_buffer.hpp @@ -33,22 +33,17 @@ class StringBuffer { public: // C'tor for StringBuffer. Memory backing the string will be allocated from // the pool as necessary. Can optionally be initialized from a StringValue. - StringBuffer(MemPool* pool, StringValue* str) : - _pool(pool), - _buffer_size(0) { + StringBuffer(MemPool* pool, StringValue* str) : _pool(pool), _buffer_size(0) { if (str != NULL) { _string_value = *str; _buffer_size = str->len; } } - StringBuffer(MemPool* pool) : - _pool(pool), - _buffer_size(0) { - } + StringBuffer(MemPool* pool) : _pool(pool), _buffer_size(0) {} virtual ~StringBuffer() {} - + // append 'str' to the current string, allocating a new buffer as necessary. void append(const char* str, int len) { int new_len = len + _string_value.len; @@ -62,9 +57,7 @@ public: } // TODO: switch everything to uint8_t? - void append(const uint8_t* str, int len) { - append(reinterpret_cast(str), len); - } + void append(const uint8_t* str, int len) { append(reinterpret_cast(str), len); } // Assigns contents to StringBuffer void assign(const char* str, int len) { @@ -73,9 +66,7 @@ public: } // clear the underlying StringValue. The allocated buffer can be reused. - void clear() { - _string_value.len = 0; - } + void clear() { _string_value.len = 0; } // Clears the underlying buffer and StringValue void reset() { @@ -84,24 +75,16 @@ public: } // Returns whether the current string is empty - bool empty() const { - return _string_value.len == 0; - } + bool empty() const { return _string_value.len == 0; } // Returns the length of the current string - int size() const { - return _string_value.len; - } + int size() const { return _string_value.len; } // Returns the underlying StringValue - const StringValue& str() const { - return _string_value; - } + const StringValue& str() const { return _string_value; } // Returns the buffer size - int buffer_size() const { - return _buffer_size; - } + int buffer_size() const { return _buffer_size; } private: // Grows the buffer backing the string to be at least new_size, copying @@ -123,6 +106,6 @@ private: int _buffer_size; }; -} +} // namespace doris #endif diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h index 16489d191b..43e652fb28 100644 --- a/be/src/runtime/string_value.h +++ b/be/src/runtime/string_value.h @@ -50,9 +50,8 @@ static inline int string_compare(const char* s1, int64_t n1, const char* s2, int while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { __m128i xmm0 = _mm_loadu_si128(reinterpret_cast(s1)); __m128i xmm1 = _mm_loadu_si128(reinterpret_cast(s2)); - int chars_match = - _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1, - sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE); + int chars_match = _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1, + sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE); if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) { return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match]; } @@ -193,9 +192,7 @@ struct StringValue { }; struct HashOfStringValue { - size_t operator()(const StringValue& v) const { - return HashUtil::hash(v.ptr, v.len, 0); - } + size_t operator()(const StringValue& v) const { return HashUtil::hash(v.ptr, v.len, 0); } }; }; diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp index e8ad30d83f..d74b03342a 100644 --- a/be/src/runtime/tablets_channel.cpp +++ b/be/src/runtime/tablets_channel.cpp @@ -31,7 +31,11 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(tablet_writer_count, MetricUnit::NOUNIT); std::atomic TabletsChannel::_s_tablet_writer_count; TabletsChannel::TabletsChannel(const TabletsChannelKey& key, bool is_high_priority, bool is_vec) - : _key(key), _state(kInitialized), _closed_senders(64), _is_high_priority(is_high_priority), _is_vec(is_vec) { + : _key(key), + _state(kInitialized), + _closed_senders(64), + _is_high_priority(is_high_priority), + _is_vec(is_vec) { _mem_tracker = MemTracker::create_tracker(-1, "TabletsChannel:" + std::to_string(key.index_id)); static std::once_flag once_flag; std::call_once(once_flag, [] { @@ -124,7 +128,8 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, for (auto writer : need_wait_writers) { // close may return failed, but no need to handle it here. // tablet_vec will only contains success tablet, and then let FE judge it. - writer->close_wait(tablet_vec, (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end())); + writer->close_wait(tablet_vec, (_broken_tablets.find(writer->tablet_id()) != + _broken_tablets.end())); } } return Status::OK(); @@ -144,10 +149,9 @@ Status TabletsChannel::reduce_mem_usage(int64_t mem_limit) { for (auto& it : _tablet_writers) { writers.push_back(it.second); } - std::sort(writers.begin(), writers.end(), - [](const DeltaWriter* lhs, const DeltaWriter* rhs) { - return lhs->mem_consumption() > rhs->mem_consumption(); - }); + std::sort(writers.begin(), writers.end(), [](const DeltaWriter* lhs, const DeltaWriter* rhs) { + return lhs->mem_consumption() > rhs->mem_consumption(); + }); // Decide which writes should be flushed to reduce mem consumption. // The main idea is to flush at least one third of the mem_limit. @@ -161,7 +165,7 @@ Status TabletsChannel::reduce_mem_usage(int64_t mem_limit) { // the tablet that has not been flushed before will accumulate more data, thereby reducing the number of flushes. int64_t mem_to_flushed = mem_limit / 3; int counter = 0; - int64_t sum = 0; + int64_t sum = 0; for (auto writer : writers) { if (writer->mem_consumption() <= 0) { break; @@ -180,7 +184,8 @@ Status TabletsChannel::reduce_mem_usage(int64_t mem_limit) { for (int i = 0; i < counter; i++) { Status st = writers[i]->wait_flush(); if (!st.ok()) { - return Status::InternalError(fmt::format("failed to reduce mem consumption by flushing memtable. err: {}", st)); + return Status::InternalError(fmt::format( + "failed to reduce mem consumption by flushing memtable. err: {}", st)); } } return Status::OK(); @@ -254,4 +259,4 @@ std::ostream& operator<<(std::ostream& os, const TabletsChannelKey& key) { return os; } -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h index 1d0fe3c5fa..725fe44083 100644 --- a/be/src/runtime/tablets_channel.h +++ b/be/src/runtime/tablets_channel.h @@ -91,7 +91,7 @@ public: int64_t mem_consumption() const { return _mem_tracker->consumption(); } private: - template + template Status _get_current_seq(int64_t& cur_seq, const Request& request); // open all writer @@ -145,20 +145,20 @@ private: bool _is_vec = false; }; -template +template Status TabletsChannel::_get_current_seq(int64_t& cur_seq, const Request& request) { std::lock_guard l(_lock); if (_state != kOpened) { return _state == kFinished - ? _close_status - : Status::InternalError(strings::Substitute("TabletsChannel $0 state: $1", - _key.to_string(), _state)); + ? _close_status + : Status::InternalError(strings::Substitute("TabletsChannel $0 state: $1", + _key.to_string(), _state)); } cur_seq = _next_seqs[request.sender_id()]; // check packet if (request.packet_seq() > cur_seq) { LOG(WARNING) << "lost data packet, expect_seq=" << cur_seq - << ", recept_seq=" << request.packet_seq(); + << ", recept_seq=" << request.packet_seq(); return Status::InternalError("lost data packet"); } return Status::OK(); @@ -190,13 +190,13 @@ Status TabletsChannel::add_batch(const TabletWriterAddRequest& request, } auto it = tablet_to_rowidxs.find(tablet_id); if (it == tablet_to_rowidxs.end()) { - tablet_to_rowidxs.emplace(tablet_id, std::initializer_list{ i }); + tablet_to_rowidxs.emplace(tablet_id, std::initializer_list {i}); } else { it->second.emplace_back(i); } } - auto get_send_data = [&] () { + auto get_send_data = [&]() { if constexpr (std::is_same_v) { return RowBatch(*_row_desc, request.row_batch()); } else { @@ -205,12 +205,13 @@ Status TabletsChannel::add_batch(const TabletWriterAddRequest& request, }; auto send_data = get_send_data(); - google::protobuf::RepeatedPtrField* tablet_errors = response->mutable_tablet_errors(); + google::protobuf::RepeatedPtrField* tablet_errors = + response->mutable_tablet_errors(); for (const auto& tablet_to_rowidxs_it : tablet_to_rowidxs) { auto tablet_writer_it = _tablet_writers.find(tablet_to_rowidxs_it.first); if (tablet_writer_it == _tablet_writers.end()) { - return Status::InternalError( - strings::Substitute("unknown tablet to append data, tablet=$0", tablet_to_rowidxs_it.first)); + return Status::InternalError(strings::Substitute( + "unknown tablet to append data, tablet=$0", tablet_to_rowidxs_it.first)); } Status st = tablet_writer_it->second->write(&send_data, tablet_to_rowidxs_it.second); @@ -234,4 +235,4 @@ Status TabletsChannel::add_batch(const TabletWriterAddRequest& request, } return Status::OK(); } -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp index 0b71101d97..d1f206dedc 100644 --- a/be/src/runtime/thread_context.cpp +++ b/be/src/runtime/thread_context.cpp @@ -149,7 +149,7 @@ SwitchBthread::~SwitchBthread() { DCHECK(tls != nullptr); tls->_thread_mem_tracker_mgr->clear_untracked_mems(); #ifndef NDEBUG - DorisMetrics::instance()->switch_bthread_count->increment(1); + DorisMetrics::instance()->switch_bthread_count->increment(1); #endif } diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp index 1640f14464..92ee49ac0b 100644 --- a/be/src/runtime/tuple.cpp +++ b/be/src/runtime/tuple.cpp @@ -36,11 +36,9 @@ namespace doris { -static void deep_copy_collection_slots( - Tuple* shallow_copied_tuple, - const TupleDescriptor& desc, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs); +static void deep_copy_collection_slots(Tuple* shallow_copied_tuple, const TupleDescriptor& desc, + const GenMemFootprintFunc& gen_mem_footprint, + bool convert_ptrs); int64_t Tuple::total_byte_size(const TupleDescriptor& desc) const { int64_t result = desc.byte_size(); @@ -93,23 +91,22 @@ void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bo } // copy collection slot - deep_copy_collection_slots(dst, desc, [pool](int size) ->MemFootprint { - int64_t offset = pool->total_allocated_bytes(); - uint8_t* data = pool->allocate(size); - return { offset, data }; - }, - convert_ptrs - ); + deep_copy_collection_slots( + dst, desc, + [pool](int size) -> MemFootprint { + int64_t offset = pool->total_allocated_bytes(); + uint8_t* data = pool->allocate(size); + return {offset, data}; + }, + convert_ptrs); } // Deep copy collection slots. // NOTICE: The Tuple* shallow_copied_tuple must be initialized by calling memcpy function first ( // copy data from origin tuple). -static void deep_copy_collection_slots( - Tuple* shallow_copied_tuple, - const TupleDescriptor& desc, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs) { +static void deep_copy_collection_slots(Tuple* shallow_copied_tuple, const TupleDescriptor& desc, + const GenMemFootprintFunc& gen_mem_footprint, + bool convert_ptrs) { for (auto slot_desc : desc.collection_slots()) { DCHECK(slot_desc->type().is_collection_type()); if (shallow_copied_tuple->is_null(slot_desc->null_indicator_offset())) { @@ -118,8 +115,8 @@ static void deep_copy_collection_slots( // copy collection item CollectionValue* cv = shallow_copied_tuple->get_collection_slot(slot_desc->tuple_offset()); - CollectionValue::deep_copy_collection( - cv, slot_desc->type().children[0], gen_mem_footprint, convert_ptrs); + CollectionValue::deep_copy_collection(cv, slot_desc->type().children[0], gen_mem_footprint, + convert_ptrs); } } @@ -165,7 +162,8 @@ int64_t Tuple::release_string(const TupleDescriptor& desc) { return bytes; } -void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, bool convert_ptrs) { +void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, + bool convert_ptrs) { Tuple* dst = (Tuple*)(*data); memory_copy(dst, this, desc.byte_size()); *data += desc.byte_size(); @@ -186,14 +184,15 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, } // copy collection slots - deep_copy_collection_slots(dst, desc, [offset, data](int size) -> MemFootprint { - MemFootprint footprint = { *offset, reinterpret_cast(*data) }; - *offset += size; - *data += size; - return footprint; - }, - convert_ptrs - ); + deep_copy_collection_slots( + dst, desc, + [offset, data](int size) -> MemFootprint { + MemFootprint footprint = {*offset, reinterpret_cast(*data)}; + *offset += size; + *data += size; + return footprint; + }, + convert_ptrs); } template diff --git a/be/src/runtime/type_limit.h b/be/src/runtime/type_limit.h index 45096b5b4c..3e6a38715f 100644 --- a/be/src/runtime/type_limit.h +++ b/be/src/runtime/type_limit.h @@ -26,12 +26,8 @@ namespace doris { template struct type_limit { - static T min() { - return std::numeric_limits::lowest(); - } - static T max() { - return std::numeric_limits::max(); - } + static T min() { return std::numeric_limits::lowest(); } + static T max() { return std::numeric_limits::max(); } }; template <> diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp index 10a38404aa..33786d2cb0 100644 --- a/be/src/runtime/user_function_cache.cpp +++ b/be/src/runtime/user_function_cache.cpp @@ -18,7 +18,7 @@ #include "runtime/user_function_cache.h" #include -#include // boost::algorithm::ends_with +#include // boost::algorithm::ends_with #include #include diff --git a/be/src/runtime/user_function_cache.h b/be/src/runtime/user_function_cache.h index a9b68e5e8b..256a13c8c0 100644 --- a/be/src/runtime/user_function_cache.h +++ b/be/src/runtime/user_function_cache.h @@ -41,10 +41,7 @@ struct UserFunctionCacheEntry; // with id, this function library is valid. And when user wants to // change its implementation(URL), Doris will generate a new function // id. -enum class LibType { - JAR, - SO -}; +enum class LibType { JAR, SO }; class UserFunctionCache { public: @@ -70,7 +67,8 @@ public: UserFunctionCacheEntry** entry); void release_entry(UserFunctionCacheEntry* entry); - Status get_jarpath(int64_t fid, const std::string& url, const std::string& checksum, std::string* libpath); + Status get_jarpath(int64_t fid, const std::string& url, const std::string& checksum, + std::string* libpath); private: Status _load_cached_lib(); diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 5708d24d10..1bcdfdb9d3 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -101,66 +101,61 @@ enum class InstructionFail { ARM_NEON = 9 }; -auto instruction_fail_to_string(InstructionFail fail) -{ - switch (fail) - { +auto instruction_fail_to_string(InstructionFail fail) { + switch (fail) { #define ret(x) return std::make_tuple(STDERR_FILENO, x, ARRAY_SIZE(x) - 1) - case InstructionFail::NONE: - ret("NONE"); - case InstructionFail::SSE3: - ret("SSE3"); - case InstructionFail::SSSE3: - ret("SSSE3"); - case InstructionFail::SSE4_1: - ret("SSE4.1"); - case InstructionFail::SSE4_2: - ret("SSE4.2"); - case InstructionFail::POPCNT: - ret("POPCNT"); - case InstructionFail::AVX: - ret("AVX"); - case InstructionFail::AVX2: - ret("AVX2"); - case InstructionFail::AVX512: - ret("AVX512"); - case InstructionFail::ARM_NEON: - ret("ARM_NEON"); + case InstructionFail::NONE: + ret("NONE"); + case InstructionFail::SSE3: + ret("SSE3"); + case InstructionFail::SSSE3: + ret("SSSE3"); + case InstructionFail::SSE4_1: + ret("SSE4.1"); + case InstructionFail::SSE4_2: + ret("SSE4.2"); + case InstructionFail::POPCNT: + ret("POPCNT"); + case InstructionFail::AVX: + ret("AVX"); + case InstructionFail::AVX2: + ret("AVX2"); + case InstructionFail::AVX512: + ret("AVX512"); + case InstructionFail::ARM_NEON: + ret("ARM_NEON"); } __builtin_unreachable(); } - sigjmp_buf jmpbuf; -void sig_ill_check_handler(int, siginfo_t *, void *) -{ +void sig_ill_check_handler(int, siginfo_t*, void*) { siglongjmp(jmpbuf, 1); } /// Check if necessary SSE extensions are available by trying to execute some sse instructions. /// If instruction is unavailable, SIGILL will be sent by kernel. -void check_required_instructions_impl(volatile InstructionFail & fail) -{ +void check_required_instructions_impl(volatile InstructionFail& fail) { #if defined(__SSE3__) fail = InstructionFail::SSE3; - __asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); + __asm__ volatile("addsubpd %%xmm0, %%xmm0" : : : "xmm0"); #endif #if defined(__SSSE3__) fail = InstructionFail::SSSE3; - __asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0"); + __asm__ volatile("pabsw %%xmm0, %%xmm0" : : : "xmm0"); #endif #if defined(__SSE4_1__) fail = InstructionFail::SSE4_1; - __asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); + __asm__ volatile("pmaxud %%xmm0, %%xmm0" : : : "xmm0"); #endif #if defined(__SSE4_2__) fail = InstructionFail::SSE4_2; - __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); + __asm__ volatile("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); #endif /// Defined by -msse4.2 @@ -169,47 +164,42 @@ void check_required_instructions_impl(volatile InstructionFail & fail) { uint64_t a = 0; uint64_t b = 0; - __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + __asm__ volatile("popcnt %1, %0" : "=r"(a) : "r"(b) :); } #endif #if defined(__AVX__) fail = InstructionFail::AVX; - __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); + __asm__ volatile("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); #endif #if defined(__AVX2__) fail = InstructionFail::AVX2; - __asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); + __asm__ volatile("vpabsw %%ymm0, %%ymm0" : : : "ymm0"); #endif #if defined(__AVX512__) fail = InstructionFail::AVX512; - __asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); + __asm__ volatile("vpabsw %%zmm0, %%zmm0" : : : "zmm0"); #endif #if defined(__ARM_NEON__) fail = InstructionFail::ARM_NEON; - __asm__ volatile ("vadd.i32 q8, q8, q8" : : : "q8"); + __asm__ volatile("vadd.i32 q8, q8, q8" : : : "q8"); #endif fail = InstructionFail::NONE; } -bool write_retry(int fd, const char * data, size_t size) -{ - if (!size) - size = strlen(data); +bool write_retry(int fd, const char* data, size_t size) { + if (!size) size = strlen(data); - while (size != 0) - { + while (size != 0) { ssize_t res = ::write(fd, data, size); - if ((-1 == res || 0 == res) && errno != EINTR) - return false; + if ((-1 == res || 0 == res) && errno != EINTR) return false; - if (res > 0) - { + if (res > 0) { data += res; size -= res; } @@ -219,26 +209,22 @@ bool write_retry(int fd, const char * data, size_t size) } /// Macros to avoid using strlen(), since it may fail if SSE is not supported. -#define WRITE_ERROR(data) do \ - { \ - static_assert(__builtin_constant_p(data)); \ - if (!write_retry(STDERR_FILENO, data, ARRAY_SIZE(data) - 1)) \ - _Exit(1); \ +#define WRITE_ERROR(data) \ + do { \ + static_assert(__builtin_constant_p(data)); \ + if (!write_retry(STDERR_FILENO, data, ARRAY_SIZE(data) - 1)) _Exit(1); \ } while (false) /// Check SSE and others instructions availability. Calls exit on fail. /// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. -void check_required_instructions() -{ - struct sigaction sa{}; - struct sigaction sa_old{}; +void check_required_instructions() { + struct sigaction sa {}; + struct sigaction sa_old {}; sa.sa_sigaction = sig_ill_check_handler; sa.sa_flags = SA_SIGINFO; auto signal = SIGILL; - if (sigemptyset(&sa.sa_mask) != 0 - || sigaddset(&sa.sa_mask, signal) != 0 - || sigaction(signal, &sa, &sa_old) != 0) - { + if (sigemptyset(&sa.sa_mask) != 0 || sigaddset(&sa.sa_mask, signal) != 0 || + sigaction(signal, &sa, &sa_old) != 0) { /// You may wonder about strlen. /// Typical implementation of strlen is using SSE4.2 or AVX2. /// But this is not the case because it's compiler builtin and is executed at compile time. @@ -249,36 +235,31 @@ void check_required_instructions() volatile InstructionFail fail = InstructionFail::NONE; - if (sigsetjmp(jmpbuf, 1)) - { + if (sigsetjmp(jmpbuf, 1)) { WRITE_ERROR("Instruction check fail. The CPU does not support "); - if (!std::apply(write_retry, instruction_fail_to_string(fail))) - _Exit(1); + if (!std::apply(write_retry, instruction_fail_to_string(fail))) _Exit(1); WRITE_ERROR(" instruction set.\n"); - WRITE_ERROR("For example, if your CPU does not support AVX2, you need to rebuild the Doris BE with: USE_AVX2=0 sh build.sh --be"); + WRITE_ERROR( + "For example, if your CPU does not support AVX2, you need to rebuild the Doris BE " + "with: USE_AVX2=0 sh build.sh --be"); _Exit(1); } check_required_instructions_impl(fail); - if (sigaction(signal, &sa_old, nullptr)) - { + if (sigaction(signal, &sa_old, nullptr)) { WRITE_ERROR("Can not set signal handler\n"); _Exit(1); } } -struct Checker -{ - Checker() - { - check_required_instructions(); - } +struct Checker { + Checker() { check_required_instructions(); } } checker #ifndef __APPLE__ - __attribute__((init_priority(101))) /// Run before other static initializers. + __attribute__((init_priority(101))) /// Run before other static initializers. #endif -; + ; int main(int argc, char** argv) { doris::signal::InstallFailureSignalHandler(); @@ -469,7 +450,8 @@ int main(int argc, char** argv) { status = heartbeat_thrift_server->start(); if (!status.ok()) { - LOG(ERROR) << "Doris BE HeartBeat Service did not start correctly, exiting: " << status.get_error_msg(); + LOG(ERROR) << "Doris BE HeartBeat Service did not start correctly, exiting: " + << status.get_error_msg(); doris::shutdown_logging(); exit(1); } @@ -501,8 +483,8 @@ int main(int argc, char** argv) { #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) doris::MemInfo::refresh_current_mem(); #endif - // TODO(zxy) 10s is too long to clear the expired task mem tracker. - // It should be actively triggered at the end of query/load. + // TODO(zxy) 10s is too long to clear the expired task mem tracker. + // It should be actively triggered at the end of query/load. doris::ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->logout_task_mem_tracker(); sleep(10); } @@ -534,4 +516,3 @@ static void help(const char* progname) { printf(" -v, --version output version information, then exit\n"); printf(" -?, --help show this help, then exit\n"); } - diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 2d3b9daba3..3ce77f6bb6 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -66,7 +66,6 @@ Status HttpService::start() { _ev_http_server->register_handler(HttpMethod::PUT, "/api/{db}/_stream_load_2pc", streamload_2pc_action); - // register download action std::vector allow_paths; for (auto& path : _env->store_paths()) { @@ -121,8 +120,7 @@ Status HttpService::start() { } MetaAction* meta_action = _pool.add(new MetaAction(HEADER)); - _ev_http_server->register_handler(HttpMethod::GET, "/api/meta/header/{tablet_id}", - meta_action); + _ev_http_server->register_handler(HttpMethod::GET, "/api/meta/header/{tablet_id}", meta_action); #ifndef BE_TEST // Register BE checksum action diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp index 764f5b5f19..1ff2b5aa4e 100644 --- a/be/src/util/binary_cast.hpp +++ b/be/src/util/binary_cast.hpp @@ -71,14 +71,17 @@ To binary_cast(From from) { constexpr bool from_decv2_to_packed128 = match_v; constexpr bool from_i128_to_dt = match_v; constexpr bool from_dt_to_i128 = match_v; - constexpr bool from_i64_to_vec_dt = match_v; - constexpr bool from_vec_dt_to_i64 = match_v; + constexpr bool from_i64_to_vec_dt = + match_v; + constexpr bool from_vec_dt_to_i64 = + match_v; constexpr bool from_i128_to_decv2 = match_v; constexpr bool from_decv2_to_i128 = match_v; static_assert(from_u64_to_db || from_i64_to_db || from_db_to_i64 || from_db_to_u64 || - from_decv2_to_packed128 || from_i128_to_dt || from_dt_to_i128 || from_i64_to_vec_dt || from_vec_dt_to_i64 || - from_i128_to_decv2 || from_decv2_to_i128); + from_decv2_to_packed128 || from_i128_to_dt || from_dt_to_i128 || + from_i64_to_vec_dt || from_vec_dt_to_i64 || from_i128_to_decv2 || + from_decv2_to_i128); if constexpr (from_u64_to_db) { TypeConverter conv; diff --git a/be/src/util/bit_util.h b/be/src/util/bit_util.h index dabf87ee0f..ae10149ac9 100644 --- a/be/src/util/bit_util.h +++ b/be/src/util/bit_util.h @@ -337,113 +337,113 @@ public: return v >> num_bits; } - static void ByteSwapScalar(void *dest, const void *source, int len) { - uint8_t *dst = reinterpret_cast(dest); - const uint8_t *src = reinterpret_cast(source); + static void ByteSwapScalar(void* dest, const void* source, int len) { + uint8_t* dst = reinterpret_cast(dest); + const uint8_t* src = reinterpret_cast(source); switch (len) { - case 1: - *reinterpret_cast(dst) = *reinterpret_cast(src); - return; - case 2: - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src)); - return; - case 3: - *reinterpret_cast(dst + 1) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst) = *reinterpret_cast(src + 2); - return; - case 4: - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src)); - return; - case 5: - *reinterpret_cast(dst + 1) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst) = *reinterpret_cast(src + 4); - return; - case 6: - *reinterpret_cast(dst + 2) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src + 4)); - return; - case 7: - *reinterpret_cast(dst + 3) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst + 1) = - BitUtil::byte_swap(*reinterpret_cast(src + 4)); - *reinterpret_cast(dst) = *reinterpret_cast(src + 6); - return; - case 8: - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src)); - return; - case 9: - *reinterpret_cast(dst + 1) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst) = *reinterpret_cast(src + 8); - return; - case 10: - *reinterpret_cast(dst + 2) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src + 8)); - return; - case 11: - *reinterpret_cast(dst + 3) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst + 1) = - BitUtil::byte_swap(*reinterpret_cast(src + 8)); - *reinterpret_cast(dst) = *reinterpret_cast(src + 10); - return; - case 12: - *reinterpret_cast(dst + 4) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src + 8)); - return; - case 13: - *reinterpret_cast(dst + 5) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst + 1) = - BitUtil::byte_swap(*reinterpret_cast(src + 8)); - *reinterpret_cast(dst) = *reinterpret_cast(src + 12); - return; - case 14: - *reinterpret_cast(dst + 6) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst + 2) = - BitUtil::byte_swap(*reinterpret_cast(src + 8)); - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src + 12)); - return; - case 15: - *reinterpret_cast(dst + 7) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst + 3) = - BitUtil::byte_swap(*reinterpret_cast(src + 8)); - *reinterpret_cast(dst + 1) = - BitUtil::byte_swap(*reinterpret_cast(src + 12)); - *reinterpret_cast(dst) = *reinterpret_cast(src + 14); - return; - case 16: - *reinterpret_cast(dst + 8) = - BitUtil::byte_swap(*reinterpret_cast(src)); - *reinterpret_cast(dst) = - BitUtil::byte_swap(*reinterpret_cast(src + 8)); - return; - default: - // Revert to slow loop-based swap. - ByteSwapScalarLoop(source, len, dest); - return; + case 1: + *reinterpret_cast(dst) = *reinterpret_cast(src); + return; + case 2: + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src)); + return; + case 3: + *reinterpret_cast(dst + 1) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst) = *reinterpret_cast(src + 2); + return; + case 4: + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src)); + return; + case 5: + *reinterpret_cast(dst + 1) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst) = *reinterpret_cast(src + 4); + return; + case 6: + *reinterpret_cast(dst + 2) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src + 4)); + return; + case 7: + *reinterpret_cast(dst + 3) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst + 1) = + BitUtil::byte_swap(*reinterpret_cast(src + 4)); + *reinterpret_cast(dst) = *reinterpret_cast(src + 6); + return; + case 8: + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src)); + return; + case 9: + *reinterpret_cast(dst + 1) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst) = *reinterpret_cast(src + 8); + return; + case 10: + *reinterpret_cast(dst + 2) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src + 8)); + return; + case 11: + *reinterpret_cast(dst + 3) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst + 1) = + BitUtil::byte_swap(*reinterpret_cast(src + 8)); + *reinterpret_cast(dst) = *reinterpret_cast(src + 10); + return; + case 12: + *reinterpret_cast(dst + 4) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src + 8)); + return; + case 13: + *reinterpret_cast(dst + 5) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst + 1) = + BitUtil::byte_swap(*reinterpret_cast(src + 8)); + *reinterpret_cast(dst) = *reinterpret_cast(src + 12); + return; + case 14: + *reinterpret_cast(dst + 6) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst + 2) = + BitUtil::byte_swap(*reinterpret_cast(src + 8)); + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src + 12)); + return; + case 15: + *reinterpret_cast(dst + 7) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst + 3) = + BitUtil::byte_swap(*reinterpret_cast(src + 8)); + *reinterpret_cast(dst + 1) = + BitUtil::byte_swap(*reinterpret_cast(src + 12)); + *reinterpret_cast(dst) = *reinterpret_cast(src + 14); + return; + case 16: + *reinterpret_cast(dst + 8) = + BitUtil::byte_swap(*reinterpret_cast(src)); + *reinterpret_cast(dst) = + BitUtil::byte_swap(*reinterpret_cast(src + 8)); + return; + default: + // Revert to slow loop-based swap. + ByteSwapScalarLoop(source, len, dest); + return; } } - static void ByteSwapScalarLoop(const void *src, int len, void *dst) { + static void ByteSwapScalarLoop(const void* src, int len, void* dst) { //TODO: improve the performance of following code further using BSWAP intrinsic - uint8_t *d = reinterpret_cast(dst); - const uint8_t *s = reinterpret_cast(src); + uint8_t* d = reinterpret_cast(dst); + const uint8_t* s = reinterpret_cast(src); for (int i = 0; i < len; ++i) d[i] = s[len - i - 1]; } }; diff --git a/be/src/util/blocking_queue.hpp b/be/src/util/blocking_queue.hpp index 92411811be..cf5d53fb67 100644 --- a/be/src/util/blocking_queue.hpp +++ b/be/src/util/blocking_queue.hpp @@ -36,12 +36,11 @@ namespace doris { template class BlockingQueue { public: - BlockingQueue(size_t max_elements) : - _shutdown(false), - _max_elements(max_elements), - _total_get_wait_time(0), - _total_put_wait_time(0) { - } + BlockingQueue(size_t max_elements) + : _shutdown(false), + _max_elements(max_elements), + _total_get_wait_time(0), + _total_put_wait_time(0) {} // Get an element from the queue, waiting indefinitely for one to become available. // Returns false if we were shut down prior to getting the element, and there @@ -155,7 +154,6 @@ public: } private: - uint32_t SizeLocked(const std::unique_lock& lock) const { // The size of 'get_list_' is read racily to avoid getting 'get_lock_' in write path. DCHECK(lock.owns_lock()); @@ -164,8 +162,8 @@ private: bool _shutdown; const int _max_elements; - std::condition_variable _get_cv; // 'get' callers wait on this - std::condition_variable _put_cv; // 'put' callers wait on this + std::condition_variable _get_cv; // 'get' callers wait on this + std::condition_variable _put_cv; // 'put' callers wait on this // _lock guards access to _list, total_get_wait_time, and total_put_wait_time mutable std::mutex _lock; std::list _list; @@ -173,6 +171,6 @@ private: uint64_t _total_put_wait_time; }; -} +} // namespace doris #endif diff --git a/be/src/util/broker_storage_backend.cpp b/be/src/util/broker_storage_backend.cpp index 65ddbb30a8..c812e98f48 100644 --- a/be/src/util/broker_storage_backend.cpp +++ b/be/src/util/broker_storage_backend.cpp @@ -196,8 +196,8 @@ Status BrokerStorageBackend::rename_dir(const std::string& orig_name, const std: return rename(orig_name, new_name); } -Status BrokerStorageBackend::list(const std::string& remote_path, bool contain_md5, - bool recursion, std::map* files) { +Status BrokerStorageBackend::list(const std::string& remote_path, bool contain_md5, bool recursion, + std::map* files) { Status status = Status::OK(); BrokerServiceConnection client(client_cache(_env), _broker_addr, config::thrift_rpc_timeout_ms, &status); diff --git a/be/src/util/broker_storage_backend.h b/be/src/util/broker_storage_backend.h index 8c011eb285..baa78bfc0f 100644 --- a/be/src/util/broker_storage_backend.h +++ b/be/src/util/broker_storage_backend.h @@ -36,8 +36,8 @@ public: const std::string& checksum) override; Status rename(const std::string& orig_name, const std::string& new_name) override; Status rename_dir(const std::string& orig_name, const std::string& new_name) override; - Status list(const std::string& remote_path, bool contain_md5, - bool recursion, std::map* files) override; + Status list(const std::string& remote_path, bool contain_md5, bool recursion, + std::map* files) override; Status direct_upload(const std::string& remote, const std::string& content) override; Status rm(const std::string& remote) override; Status rmdir(const std::string& remote) override; diff --git a/be/src/util/brpc_client_cache.h b/be/src/util/brpc_client_cache.h index f268a69243..d7c2369e0b 100644 --- a/be/src/util/brpc_client_cache.h +++ b/be/src/util/brpc_client_cache.h @@ -66,7 +66,7 @@ public: std::shared_ptr get_client(const std::string& host_port) { std::shared_ptr stub_ptr; auto get_value = [&stub_ptr](typename StubMap::mapped_type& v) { stub_ptr = v; }; - if(LIKELY(_stub_map.if_contains(host_port, get_value))) { + if (LIKELY(_stub_map.if_contains(host_port, get_value))) { return stub_ptr; } @@ -88,9 +88,8 @@ public: } auto stub = std::make_shared(channel.release(), google::protobuf::Service::STUB_OWNS_CHANNEL); - _stub_map.try_emplace_l(host_port, - [&stub](typename StubMap::mapped_type& v) { stub = v; }, - stub); + _stub_map.try_emplace_l( + host_port, [&stub](typename StubMap::mapped_type& v) { stub = v; }, stub); return stub; } diff --git a/be/src/util/counter_cond_variable.hpp b/be/src/util/counter_cond_variable.hpp index da56b4e41b..8d38985168 100644 --- a/be/src/util/counter_cond_variable.hpp +++ b/be/src/util/counter_cond_variable.hpp @@ -43,13 +43,12 @@ namespace doris { // ... do work... // cond.dec(); // -// thread3(waiter): -// cond.block_wait(); +// thread3(waiter): +// cond.block_wait(); class CounterCondVariable { public: - explicit CounterCondVariable(int init = 0) : _count(init) { - } + explicit CounterCondVariable(int init = 0) : _count(init) {} // increase the counter void inc(int inc = 1) { @@ -81,4 +80,4 @@ private: int _count; }; -} // end namespace +} // namespace doris diff --git a/be/src/util/counts.h b/be/src/util/counts.h index dc1cd3d0c4..56ca3f0525 100644 --- a/be/src/util/counts.h +++ b/be/src/util/counts.h @@ -82,7 +82,8 @@ public: } } - double get_percentile(std::vector>& counts, double position) const { + double get_percentile(std::vector>& counts, + double position) const { long lower = std::floor(position); long higher = std::ceil(position); diff --git a/be/src/util/debug/sanitizer_scopes.h b/be/src/util/debug/sanitizer_scopes.h index 363d6d7fce..3e72bdf24a 100644 --- a/be/src/util/debug/sanitizer_scopes.h +++ b/be/src/util/debug/sanitizer_scopes.h @@ -30,17 +30,13 @@ namespace debug { // on the current thread as long as it is alive. These may be safely // nested. class ScopedTSANIgnoreReadsAndWrites { - public: - ScopedTSANIgnoreReadsAndWrites() { - ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN(); - } - ~ScopedTSANIgnoreReadsAndWrites() { - ANNOTATE_IGNORE_READS_AND_WRITES_END(); - } - private: - DISALLOW_COPY_AND_ASSIGN(ScopedTSANIgnoreReadsAndWrites); +public: + ScopedTSANIgnoreReadsAndWrites() { ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN(); } + ~ScopedTSANIgnoreReadsAndWrites() { ANNOTATE_IGNORE_READS_AND_WRITES_END(); } + +private: + DISALLOW_COPY_AND_ASSIGN(ScopedTSANIgnoreReadsAndWrites); }; } // namespace debug } // namespace doris - diff --git a/be/src/util/histogram.cpp b/be/src/util/histogram.cpp index b3397114b5..a081d0e012 100644 --- a/be/src/util/histogram.cpp +++ b/be/src/util/histogram.cpp @@ -30,7 +30,8 @@ HistogramBucketMapper::HistogramBucketMapper() { _bucket_values = {1, 2}; _value_index_map = {{1, 0}, {2, 1}}; double bucket_val = static_cast(_bucket_values.back()); - while ((bucket_val = 1.5 * bucket_val) <= static_cast(std::numeric_limits::max())) { + while ((bucket_val = 1.5 * bucket_val) <= + static_cast(std::numeric_limits::max())) { _bucket_values.push_back(static_cast(bucket_val)); // Extracts two most significant digits to make histogram buckets more // human-readable. E.g., 172 becomes 170. @@ -51,7 +52,7 @@ size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const { return _bucket_values.size() - 1; } else if (value >= _min_bucket_value) { std::map::const_iterator lowerBound = - _value_index_map.lower_bound(value); + _value_index_map.lower_bound(value); if (lowerBound != _value_index_map.end()) { return static_cast(lowerBound->second); } else { @@ -63,7 +64,7 @@ size_t HistogramBucketMapper::index_for_value(const uint64_t& value) const { } namespace { - const HistogramBucketMapper bucket_mapper; +const HistogramBucketMapper bucket_mapper; } HistogramStat::HistogramStat() : _num_buckets(bucket_mapper.bucket_count()) { @@ -82,7 +83,9 @@ void HistogramStat::clear() { } }; -bool HistogramStat::is_empty() const { return num() == 0; } +bool HistogramStat::is_empty() const { + return num() == 0; +} void HistogramStat::add(const uint64_t& value) { // This function is designed to be lock free, as it's in the critical path @@ -91,7 +94,7 @@ void HistogramStat::add(const uint64_t& value) { const size_t index = bucket_mapper.index_for_value(value); DCHECK(index < _num_buckets); _buckets[index].store(_buckets[index].load(std::memory_order_relaxed) + 1, - std::memory_order_relaxed); + std::memory_order_relaxed); uint64_t old_min = min(); if (value < old_min) { @@ -103,13 +106,10 @@ void HistogramStat::add(const uint64_t& value) { _max.store(value, std::memory_order_relaxed); } - _num.store(_num.load(std::memory_order_relaxed) + 1, - std::memory_order_relaxed); - _sum.store(_sum.load(std::memory_order_relaxed) + value, - std::memory_order_relaxed); - _sum_squares.store( - _sum_squares.load(std::memory_order_relaxed) + value * value, - std::memory_order_relaxed); + _num.store(_num.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + _sum.store(_sum.load(std::memory_order_relaxed) + value, std::memory_order_relaxed); + _sum_squares.store(_sum_squares.load(std::memory_order_relaxed) + value * value, + std::memory_order_relaxed); } void HistogramStat::merge(const HistogramStat& other) { @@ -118,13 +118,13 @@ void HistogramStat::merge(const HistogramStat& other) { // requires no lock and value update can still happen concurrently uint64_t old_min = min(); uint64_t other_min = other.min(); - while (other_min < old_min && - !_min.compare_exchange_weak(old_min, other_min)) {} + while (other_min < old_min && !_min.compare_exchange_weak(old_min, other_min)) { + } uint64_t old_max = max(); uint64_t other_max = other.max(); - while (other_max > old_max && - !_max.compare_exchange_weak(old_max, other_max)) {} + while (other_max > old_max && !_max.compare_exchange_weak(old_max, other_max)) { + } _num.fetch_add(other.num(), std::memory_order_relaxed); _sum.fetch_add(other.sum(), std::memory_order_relaxed); @@ -146,7 +146,7 @@ double HistogramStat::percentile(double p) const { cumulative_sum += bucket_value; if (cumulative_sum >= threshold) { // Scale linearly within this bucket - uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b-1); + uint64_t left_point = (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1); uint64_t right_point = bucket_mapper.bucket_limit(b); uint64_t left_sum = cumulative_sum - bucket_value; uint64_t right_sum = cumulative_sum; @@ -178,45 +178,40 @@ double HistogramStat::standard_deviation() const { uint64_t cur_sum = sum(); uint64_t cur_sum_squares = sum_squares(); if (cur_num == 0) return 0; - double variance = - static_cast(cur_sum_squares * cur_num - cur_sum * cur_sum) / - static_cast(cur_num * cur_num); + double variance = static_cast(cur_sum_squares * cur_num - cur_sum * cur_sum) / + static_cast(cur_num * cur_num); return std::sqrt(variance); } std::string HistogramStat::to_string() const { uint64_t cur_num = num(); std::string r; char buf[1650]; - snprintf(buf, sizeof(buf), - "Count: %" PRIu64 " Average: %.4f StdDev: %.2f\n", - cur_num, average(), standard_deviation()); + snprintf(buf, sizeof(buf), "Count: %" PRIu64 " Average: %.4f StdDev: %.2f\n", cur_num, + average(), standard_deviation()); + r.append(buf); + snprintf(buf, sizeof(buf), "Min: %" PRIu64 " Median: %.4f Max: %" PRIu64 "\n", + (cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max())); r.append(buf); snprintf(buf, sizeof(buf), - "Min: %" PRIu64 " Median: %.4f Max: %" PRIu64 "\n", - (cur_num == 0 ? 0 : min()), median(), (cur_num == 0 ? 0 : max())); - r.append(buf); - snprintf(buf, sizeof(buf), - "Percentiles: " - "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n", - percentile(50), percentile(75), percentile(99), percentile(99.9), - percentile(99.99)); + "Percentiles: " + "P50: %.2f P75: %.2f P99: %.2f P99.9: %.2f P99.99: %.2f\n", + percentile(50), percentile(75), percentile(99), percentile(99.9), percentile(99.99)); r.append(buf); r.append("------------------------------------------------------\n"); - if (cur_num == 0) return r; // all buckets are empty + if (cur_num == 0) return r; // all buckets are empty const double mult = 100.0 / cur_num; uint64_t cumulative_sum = 0; for (unsigned int b = 0; b < _num_buckets; b++) { uint64_t bucket_value = bucket_at(b); if (bucket_value <= 0.0) continue; cumulative_sum += bucket_value; - snprintf(buf, sizeof(buf), - "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ", - (b == 0) ? '[' : '(', - (b == 0) ? 0 : bucket_mapper.bucket_limit(b-1), // left - bucket_mapper.bucket_limit(b), // right - bucket_value, // count - (mult * bucket_value), // percentage - (mult * cumulative_sum)); // cumulative percentage + snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ", + (b == 0) ? '[' : '(', + (b == 0) ? 0 : bucket_mapper.bucket_limit(b - 1), // left + bucket_mapper.bucket_limit(b), // right + bucket_value, // count + (mult * bucket_value), // percentage + (mult * cumulative_sum)); // cumulative percentage r.append(buf); // Add hash marks based on percentage; 20 marks for 100%. @@ -227,4 +222,4 @@ std::string HistogramStat::to_string() const { return r; } -} // namespace doris +} // namespace doris diff --git a/be/src/util/histogram.h b/be/src/util/histogram.h index bd81b2bfc7..4eea65f81a 100644 --- a/be/src/util/histogram.h +++ b/be/src/util/histogram.h @@ -29,7 +29,7 @@ namespace doris { // Histogram data structure implementation: // // After construction, the 'value_index_map' will be set to: -// +// // BucketValue: | 1 | 2 | 2*1.5 |2*1.5^2|2*1.5^3| ... |2*1.5^n| ... |UINT64MAX| // Index: | 0 | 1 | 2 | 3 | 4 | ... | n-1 | ... | 108 | // @@ -40,22 +40,16 @@ namespace doris { class HistogramBucketMapper { public: HistogramBucketMapper(); - + // converts a value to the bucket index. size_t index_for_value(const uint64_t& value) const; // number of buckets required. - size_t bucket_count() const { - return _bucket_values.size(); - } + size_t bucket_count() const { return _bucket_values.size(); } - uint64_t last_value() const { - return _max_bucket_value; - } + uint64_t last_value() const { return _max_bucket_value; } - uint64_t first_value() const { - return _min_bucket_value; - } + uint64_t first_value() const { return _min_bucket_value; } uint64_t bucket_limit(const size_t bucket_number) const { DCHECK(bucket_number < bucket_count()); @@ -85,12 +79,8 @@ struct HistogramStat { uint64_t max() const { return _max.load(std::memory_order_relaxed); } uint64_t num() const { return _num.load(std::memory_order_relaxed); } uint64_t sum() const { return _sum.load(std::memory_order_relaxed); } - uint64_t sum_squares() const { - return _sum_squares.load(std::memory_order_relaxed); - } - uint64_t bucket_at(size_t b) const { - return _buckets[b].load(std::memory_order_relaxed); - } + uint64_t sum_squares() const { return _sum_squares.load(std::memory_order_relaxed); } + uint64_t bucket_at(size_t b) const { return _buckets[b].load(std::memory_order_relaxed); } double median() const; double percentile(double p) const; @@ -110,4 +100,4 @@ struct HistogramStat { const uint64_t _num_buckets; }; -} // namespace doris +} // namespace doris diff --git a/be/src/util/jni-util.cpp b/be/src/util/jni-util.cpp index cad68db617..54ca27a64d 100644 --- a/be/src/util/jni-util.cpp +++ b/be/src/util/jni-util.cpp @@ -35,7 +35,7 @@ void FindOrCreateJavaVM() { int num_vms; int rv = JNI_GetCreatedJavaVMs(&g_vm, 1, &num_vms); if (rv == 0) { - JNIEnv *env; + JNIEnv* env; JavaVMInitArgs vm_args; JavaVMOption options[1]; char* str = getenv("DORIS_JNI_CLASSPATH_PARAMETER"); @@ -45,7 +45,7 @@ void FindOrCreateJavaVM() { vm_args.nOptions = 1; vm_args.ignoreUnrecognized = JNI_TRUE; - int res = JNI_CreateJavaVM(&g_vm, (void **)&env, &vm_args); + int res = JNI_CreateJavaVM(&g_vm, (void**)&env, &vm_args); DCHECK_LT(res, 0) << "Failed tp create JVM, code= " << res; } else { CHECK_EQ(rv, 0) << "Could not find any created Java VM"; @@ -101,7 +101,7 @@ Status JniUtil::GetJNIEnvSlowPath(JNIEnv** env) { GoogleOnceInit(&g_vm_once, &FindOrCreateJavaVM); int rc = g_vm->GetEnv(reinterpret_cast(&tls_env_), JNI_VERSION_1_8); if (rc == JNI_EDETACHED) { - rc = g_vm->AttachCurrentThread((void **) &tls_env_, nullptr); + rc = g_vm->AttachCurrentThread((void**)&tls_env_, nullptr); } if (rc != 0 || tls_env_ == nullptr) { return Status::InternalError("Unable to get JVM!"); @@ -117,10 +117,11 @@ Status JniUtil::GetJniExceptionMsg(JNIEnv* env, bool log_stack, const string& pr } env->ExceptionClear(); DCHECK(throwable_to_string_id() != nullptr); - const char* oom_msg_template = "$0 threw an unchecked exception. The JVM is likely out " + const char* oom_msg_template = + "$0 threw an unchecked exception. The JVM is likely out " "of memory (OOM)."; - jstring msg = static_cast(env->CallStaticObjectMethod(jni_util_class(), - throwable_to_string_id(), exc)); + jstring msg = static_cast( + env->CallStaticObjectMethod(jni_util_class(), throwable_to_string_id(), exc)); if (env->ExceptionOccurred()) { env->ExceptionClear(); string oom_msg = strings::Substitute(oom_msg_template, "throwableToString"); @@ -130,8 +131,8 @@ Status JniUtil::GetJniExceptionMsg(JNIEnv* env, bool log_stack, const string& pr JniUtfCharGuard msg_str_guard; RETURN_IF_ERROR(JniUtfCharGuard::create(env, msg, &msg_str_guard)); if (log_stack) { - jstring stack = static_cast(env->CallStaticObjectMethod(jni_util_class(), - throwable_to_stack_trace_id(), exc)); + jstring stack = static_cast( + env->CallStaticObjectMethod(jni_util_class(), throwable_to_stack_trace_id(), exc)); if (env->ExceptionOccurred()) { env->ExceptionClear(); string oom_msg = strings::Substitute(oom_msg_template, "throwableToStackTrace"); @@ -185,8 +186,7 @@ Status JniUtil::Init() { } // Find InternalException class and create a global ref. - jclass local_internal_exc_cl = - env->FindClass("org/apache/doris/udf/InternalException"); + jclass local_internal_exc_cl = env->FindClass("org/apache/doris/udf/InternalException"); if (local_internal_exc_cl == NULL) { if (env->ExceptionOccurred()) env->ExceptionDescribe(); return Status::InternalError("Failed to find JniUtil class."); @@ -202,39 +202,34 @@ Status JniUtil::Init() { } // Throwable toString() - throwable_to_string_id_ = - env->GetStaticMethodID(jni_util_cl_, "throwableToString", - "(Ljava/lang/Throwable;)Ljava/lang/String;"); + throwable_to_string_id_ = env->GetStaticMethodID(jni_util_cl_, "throwableToString", + "(Ljava/lang/Throwable;)Ljava/lang/String;"); if (throwable_to_string_id_ == NULL) { if (env->ExceptionOccurred()) env->ExceptionDescribe(); return Status::InternalError("Failed to find JniUtil.throwableToString method."); } // throwableToStackTrace() - throwable_to_stack_trace_id_ = - env->GetStaticMethodID(jni_util_cl_, "throwableToStackTrace", - "(Ljava/lang/Throwable;)Ljava/lang/String;"); + throwable_to_stack_trace_id_ = env->GetStaticMethodID( + jni_util_cl_, "throwableToStackTrace", "(Ljava/lang/Throwable;)Ljava/lang/String;"); if (throwable_to_stack_trace_id_ == NULL) { if (env->ExceptionOccurred()) env->ExceptionDescribe(); return Status::InternalError("Failed to find JniUtil.throwableToFullStackTrace method."); } - get_jvm_metrics_id_ = - env->GetStaticMethodID(jni_util_cl_, "getJvmMemoryMetrics", "()[B"); + get_jvm_metrics_id_ = env->GetStaticMethodID(jni_util_cl_, "getJvmMemoryMetrics", "()[B"); if (get_jvm_metrics_id_ == NULL) { if (env->ExceptionOccurred()) env->ExceptionDescribe(); return Status::InternalError("Failed to find JniUtil.getJvmMemoryMetrics method."); } - get_jvm_threads_id_ = - env->GetStaticMethodID(jni_util_cl_, "getJvmThreadsInfo", "([B)[B"); + get_jvm_threads_id_ = env->GetStaticMethodID(jni_util_cl_, "getJvmThreadsInfo", "([B)[B"); if (get_jvm_threads_id_ == NULL) { if (env->ExceptionOccurred()) env->ExceptionDescribe(); return Status::InternalError("Failed to find JniUtil.getJvmThreadsInfo method."); } - get_jmx_json_ = - env->GetStaticMethodID(jni_util_cl_, "getJMXJson", "()[B"); + get_jmx_json_ = env->GetStaticMethodID(jni_util_cl_, "getJMXJson", "()[B"); if (get_jmx_json_ == NULL) { if (env->ExceptionOccurred()) env->ExceptionDescribe(); return Status::InternalError("Failed to find JniUtil.getJMXJson method."); diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h index d83e1fdbb5..9a8c1cb859 100644 --- a/be/src/util/jni-util.h +++ b/be/src/util/jni-util.h @@ -28,11 +28,11 @@ namespace doris { -#define RETURN_ERROR_IF_EXC(env) \ - do { \ - jthrowable exc = (env)->ExceptionOccurred(); \ - if (exc != nullptr) return JniUtil::GetJniExceptionMsg(env);\ - } while (false) +#define RETURN_ERROR_IF_EXC(env) \ + do { \ + jthrowable exc = (env)->ExceptionOccurred(); \ + if (exc != nullptr) return JniUtil::GetJniExceptionMsg(env); \ + } while (false) class JniUtil { public: @@ -48,8 +48,8 @@ public: return GetJNIEnvSlowPath(env); } - static Status GetGlobalClassRef( - JNIEnv* env, const char* class_str, jclass* class_ref) WARN_UNUSED_RESULT; + static Status GetGlobalClassRef(JNIEnv* env, const char* class_str, + jclass* class_ref) WARN_UNUSED_RESULT; static Status LocalToGlobalRef(JNIEnv* env, jobject local_ref, jobject* global_ref) WARN_UNUSED_RESULT; @@ -95,6 +95,7 @@ public: /// Get the char sequence. Returns nullptr if the guard does hold a char sequence. const char* get() { return utf_chars; } + private: JNIEnv* env; jstring jstr; @@ -104,14 +105,13 @@ private: class JniLocalFrame { public: - JniLocalFrame(): env_(nullptr) {} - ~JniLocalFrame() { if (env_ != nullptr) env_->PopLocalFrame(nullptr); } - - JniLocalFrame(JniLocalFrame&& other) noexcept - : env_(other.env_) { - other.env_ = nullptr; + JniLocalFrame() : env_(nullptr) {} + ~JniLocalFrame() { + if (env_ != nullptr) env_->PopLocalFrame(nullptr); } + JniLocalFrame(JniLocalFrame&& other) noexcept : env_(other.env_) { other.env_ = nullptr; } + /// Pushes a new JNI local frame. The frame can support max_local_ref local references. /// The number of local references created inside the frame might exceed max_local_ref, /// but there is no guarantee that memory will be available. @@ -126,7 +126,7 @@ private: template Status SerializeThriftMsg(JNIEnv* env, T* msg, jbyteArray* serialized_msg) { - int buffer_size = 100 * 1024; // start out with 100KB + int buffer_size = 100 * 1024; // start out with 100KB ThriftSerializer serializer(false, buffer_size); uint8_t* buffer = NULL; diff --git a/be/src/util/logging.h b/be/src/util/logging.h index 506ddd1c78..ddf388d31c 100644 --- a/be/src/util/logging.h +++ b/be/src/util/logging.h @@ -111,9 +111,7 @@ class TaggableLogger { public: TaggableLogger(std::ostream& _stream) : _stream(_stream), _tags(nullptr) {}; - ~TaggableLogger() { - flush(); - } + ~TaggableLogger() { flush(); } void flush(); @@ -141,8 +139,10 @@ private: const std::string value; Tags* next; - Tags(const std::string& key, const std::string& value, Tags* next) : key(key), value(value), next(next) {} - Tags(const std::string& key, std::string&& value, Tags* next) : key(key), value(std::move(value)), next(next) {} + Tags(const std::string& key, const std::string& value, Tags* next) + : key(key), value(value), next(next) {} + Tags(const std::string& key, std::string&& value, Tags* next) + : key(key), value(std::move(value)), next(next) {} }; Tags* _tags; @@ -151,9 +151,7 @@ public: // add tag method here const static std::string QUERY_ID; - TaggableLogger& query_id(const std::string& query_id) { - return tag(QUERY_ID, query_id); - } + TaggableLogger& query_id(const std::string& query_id) { return tag(QUERY_ID, query_id); } TaggableLogger& query_id(const TUniqueId& query_id) { return tag(QUERY_ID, print_id(query_id)); diff --git a/be/src/util/lru_cache.hpp b/be/src/util/lru_cache.hpp index 023ee49c6e..638420b2a8 100644 --- a/be/src/util/lru_cache.hpp +++ b/be/src/util/lru_cache.hpp @@ -32,34 +32,26 @@ public: class Iterator : public std::iterator { public: - Iterator(typename std::unordered_map::iterator it) : _it(it) { } + Iterator(typename std::unordered_map::iterator it) : _it(it) {} Iterator& operator++() { ++_it; return *this; } - bool operator==(const Iterator& rhs) const { - return _it == rhs._it; - } + bool operator==(const Iterator& rhs) const { return _it == rhs._it; } - bool operator!=(const Iterator& rhs) const { - return _it != rhs._it; - } + bool operator!=(const Iterator& rhs) const { return _it != rhs._it; } - KeyValuePair* operator->() { - return _it->second.operator->(); - } + KeyValuePair* operator->() { return _it->second.operator->(); } - KeyValuePair& operator*() { - return *_it->second; - } + KeyValuePair& operator*() { return *_it->second; } private: typename std::unordered_map::iterator _it; }; - LruCache(size_t max_size) : _max_size(max_size) { } + LruCache(size_t max_size) : _max_size(max_size) {} void put(const Key& key, const Value& value) { auto it = _cache_items_map.find(key); @@ -102,17 +94,11 @@ public: return _cache_items_map.find(key) != _cache_items_map.end(); } - size_t size() const { - return _cache_items_map.size(); - } + size_t size() const { return _cache_items_map.size(); } - Iterator begin() { - return Iterator(_cache_items_map.begin()); - } + Iterator begin() { return Iterator(_cache_items_map.begin()); } - Iterator end() { - return Iterator(_cache_items_map.end()); - } + Iterator end() { return Iterator(_cache_items_map.end()); } private: std::list _cache_items_list; @@ -120,6 +106,6 @@ private: size_t _max_size; }; -} +} // namespace doris #endif diff --git a/be/src/util/mem_util.hpp b/be/src/util/mem_util.hpp index 473c8a37be..937ebc8352 100644 --- a/be/src/util/mem_util.hpp +++ b/be/src/util/mem_util.hpp @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_COMMON_UTIL_MEM_UTIL_HPP -#define DORIS_BE_SRC_COMMON_UTIL_MEM_UTIL_HPP +#ifndef DORIS_BE_SRC_COMMON_UTIL_MEM_UTIL_HPP +#define DORIS_BE_SRC_COMMON_UTIL_MEM_UTIL_HPP #include namespace doris { -template +template inline void fixed_size_memory_copy(void* dst, const void* src) { struct X { uint8_t byte[N]; @@ -31,590 +31,573 @@ inline void fixed_size_memory_copy(void* dst, const void* src) { *(reinterpret_cast(dst)) = *(reinterpret_cast(src)); } -template<> inline void fixed_size_memory_copy<0>(void*, const void*) {} +template <> +inline void fixed_size_memory_copy<0>(void*, const void*) {} -template<> inline void fixed_size_memory_copy<1>(void* dst, const void* src) { - *(reinterpret_cast(dst)) = * (reinterpret_cast(src)); +template <> +inline void fixed_size_memory_copy<1>(void* dst, const void* src) { + *(reinterpret_cast(dst)) = *(reinterpret_cast(src)); } -template<> inline void fixed_size_memory_copy<2>(void* dst, const void* src) { - *(reinterpret_cast(dst)) = * (reinterpret_cast(src)); +template <> +inline void fixed_size_memory_copy<2>(void* dst, const void* src) { + *(reinterpret_cast(dst)) = *(reinterpret_cast(src)); } -template<> inline void fixed_size_memory_copy<4>(void* dst, const void* src) { - *(reinterpret_cast(dst)) = * (reinterpret_cast(src)); +template <> +inline void fixed_size_memory_copy<4>(void* dst, const void* src) { + *(reinterpret_cast(dst)) = *(reinterpret_cast(src)); } -template<> inline void fixed_size_memory_copy<8>(void* dst, const void* src) { - *(reinterpret_cast(dst)) = * (reinterpret_cast(src)); +template <> +inline void fixed_size_memory_copy<8>(void* dst, const void* src) { + *(reinterpret_cast(dst)) = *(reinterpret_cast(src)); } inline void memory_copy(void* dst, const void* src, size_t size) { // Function fixed_size_memory_copy will report a stack-use-after-scope error in ASAN mode. #if !defined(ADDRESS_SANITIZER) static const void* addrs[] = { - &&B0, &&B1, &&B2, &&B3, &&B4, &&B5, &&B6, - &&B7, &&B8, &&B9, &&B10, &&B11, &&B12, &&B13, - &&B14, &&B15, &&B16, &&B17, &&B18, &&B19, - &&B20, &&B21, &&B22, &&B23, &&B24, &&B25, - &&B26, &&B27, &&B28, &&B29, &&B30, &&B31, - &&B32, &&B33, &&B34, &&B35, &&B36, &&B37, - &&B38, &&B39, &&B40, &&B41, &&B42, &&B43, - &&B44, &&B45, &&B46, &&B47, &&B48, &&B49, - &&B50, &&B51, &&B52, &&B53, &&B54, &&B55, - &&B56, &&B57, &&B58, &&B59, &&B60, &&B61, - &&B62, &&B63, &&B64, &&B65, &&B66, &&B67, - &&B68, &&B69, &&B70, &&B71, &&B72, &&B73, - &&B74, &&B75, &&B76, &&B77, &&B78, &&B79, - &&B80, &&B81, &&B82, &&B83, &&B84, &&B85, - &&B86, &&B87, &&B88, &&B89, &&B90, &&B91, - &&B92, &&B93, &&B94, &&B95, &&B96, &&B97, - &&B98, &&B99, &&B100, &&B101, &&B102, &&B103, - &&B104, &&B105, &&B106, &&B107, &&B108, - &&B109, &&B110, &&B111, &&B112, &&B113, - &&B114, &&B115, &&B116, &&B117, &&B118, - &&B119, &&B120, &&B121, &&B122, &&B123, - &&B124, &&B125, &&B126, &&B127, &&B128, - &&B129, &&B130, &&B131, &&B132, &&B133, - &&B134, &&B135, &&B136, &&B137, &&B138, - &&B139, &&B140, &&B141, &&B142, &&B143, - &&B144, &&B145, &&B146, &&B147, &&B148, - &&B149, &&B150, &&B151, &&B152, &&B153, - &&B154, &&B155, &&B156, &&B157, &&B158, - &&B159, &&B160, &&B161, &&B162, &&B163, - &&B164, &&B165, &&B166, &&B167, &&B168, - &&B169, &&B170, &&B171, &&B172, &&B173, &&B174, - &&B175, &&B176, &&B177, &&B178, &&B179, &&B180, - &&B181, &&B182, &&B183, &&B184, &&B185, - &&B186, &&B187, &&B188, &&B189, &&B190, &&B191, - &&B192, &&B193, &&B194, &&B195, &&B196, - &&B197, &&B198, &&B199, &&B200, &&B201, &&B202, - &&B203, &&B204, &&B205, &&B206, &&B207, - &&B208, &&B209, &&B210, &&B211, &&B212, &&B213, - &&B214, &&B215, &&B216, &&B217, &&B218, - &&B219, &&B220, &&B221, &&B222, &&B223, &&B224, - &&B225, &&B226, &&B227, &&B228, &&B229, - &&B230, &&B231, &&B232, &&B233, &&B234, &&B235, - &&B236, &&B237, &&B238, &&B239, &&B240, - &&B241, &&B242, &&B243, &&B244, &&B245, &&B246, - &&B247, &&B248, &&B249, &&B250, &&B251, - &&B252, &&B253, &&B254, &&B255, + &&B0, &&B1, &&B2, &&B3, &&B4, &&B5, &&B6, &&B7, &&B8, &&B9, &&B10, + &&B11, &&B12, &&B13, &&B14, &&B15, &&B16, &&B17, &&B18, &&B19, &&B20, &&B21, + &&B22, &&B23, &&B24, &&B25, &&B26, &&B27, &&B28, &&B29, &&B30, &&B31, &&B32, + &&B33, &&B34, &&B35, &&B36, &&B37, &&B38, &&B39, &&B40, &&B41, &&B42, &&B43, + &&B44, &&B45, &&B46, &&B47, &&B48, &&B49, &&B50, &&B51, &&B52, &&B53, &&B54, + &&B55, &&B56, &&B57, &&B58, &&B59, &&B60, &&B61, &&B62, &&B63, &&B64, &&B65, + &&B66, &&B67, &&B68, &&B69, &&B70, &&B71, &&B72, &&B73, &&B74, &&B75, &&B76, + &&B77, &&B78, &&B79, &&B80, &&B81, &&B82, &&B83, &&B84, &&B85, &&B86, &&B87, + &&B88, &&B89, &&B90, &&B91, &&B92, &&B93, &&B94, &&B95, &&B96, &&B97, &&B98, + &&B99, &&B100, &&B101, &&B102, &&B103, &&B104, &&B105, &&B106, &&B107, &&B108, &&B109, + &&B110, &&B111, &&B112, &&B113, &&B114, &&B115, &&B116, &&B117, &&B118, &&B119, &&B120, + &&B121, &&B122, &&B123, &&B124, &&B125, &&B126, &&B127, &&B128, &&B129, &&B130, &&B131, + &&B132, &&B133, &&B134, &&B135, &&B136, &&B137, &&B138, &&B139, &&B140, &&B141, &&B142, + &&B143, &&B144, &&B145, &&B146, &&B147, &&B148, &&B149, &&B150, &&B151, &&B152, &&B153, + &&B154, &&B155, &&B156, &&B157, &&B158, &&B159, &&B160, &&B161, &&B162, &&B163, &&B164, + &&B165, &&B166, &&B167, &&B168, &&B169, &&B170, &&B171, &&B172, &&B173, &&B174, &&B175, + &&B176, &&B177, &&B178, &&B179, &&B180, &&B181, &&B182, &&B183, &&B184, &&B185, &&B186, + &&B187, &&B188, &&B189, &&B190, &&B191, &&B192, &&B193, &&B194, &&B195, &&B196, &&B197, + &&B198, &&B199, &&B200, &&B201, &&B202, &&B203, &&B204, &&B205, &&B206, &&B207, &&B208, + &&B209, &&B210, &&B211, &&B212, &&B213, &&B214, &&B215, &&B216, &&B217, &&B218, &&B219, + &&B220, &&B221, &&B222, &&B223, &&B224, &&B225, &&B226, &&B227, &&B228, &&B229, &&B230, + &&B231, &&B232, &&B233, &&B234, &&B235, &&B236, &&B237, &&B238, &&B239, &&B240, &&B241, + &&B242, &&B243, &&B244, &&B245, &&B246, &&B247, &&B248, &&B249, &&B250, &&B251, &&B252, + &&B253, &&B254, &&B255, }; if (size <= 255) { // 这里使用GOTO是为了提高性能,switch、if else均无法达到此种性能 goto* addrs[size]; -B0: + B0: return fixed_size_memory_copy<0>(dst, src); -B1: + B1: return fixed_size_memory_copy<1>(dst, src); -B2: + B2: return fixed_size_memory_copy<2>(dst, src); -B3: + B3: return fixed_size_memory_copy<3>(dst, src); -B4: + B4: return fixed_size_memory_copy<4>(dst, src); -B5: + B5: return fixed_size_memory_copy<5>(dst, src); -B6: + B6: return fixed_size_memory_copy<6>(dst, src); -B7: + B7: return fixed_size_memory_copy<7>(dst, src); -B8: + B8: return fixed_size_memory_copy<8>(dst, src); -B9: + B9: return fixed_size_memory_copy<9>(dst, src); -B10: + B10: return fixed_size_memory_copy<10>(dst, src); -B11: + B11: return fixed_size_memory_copy<11>(dst, src); -B12: + B12: return fixed_size_memory_copy<12>(dst, src); -B13: + B13: return fixed_size_memory_copy<13>(dst, src); -B14: + B14: return fixed_size_memory_copy<14>(dst, src); -B15: + B15: return fixed_size_memory_copy<15>(dst, src); -B16: + B16: return fixed_size_memory_copy<16>(dst, src); -B17: + B17: return fixed_size_memory_copy<17>(dst, src); -B18: + B18: return fixed_size_memory_copy<18>(dst, src); -B19: + B19: return fixed_size_memory_copy<19>(dst, src); -B20: + B20: return fixed_size_memory_copy<20>(dst, src); -B21: + B21: return fixed_size_memory_copy<21>(dst, src); -B22: + B22: return fixed_size_memory_copy<22>(dst, src); -B23: + B23: return fixed_size_memory_copy<23>(dst, src); -B24: + B24: return fixed_size_memory_copy<24>(dst, src); -B25: + B25: return fixed_size_memory_copy<25>(dst, src); -B26: + B26: return fixed_size_memory_copy<26>(dst, src); -B27: + B27: return fixed_size_memory_copy<27>(dst, src); -B28: + B28: return fixed_size_memory_copy<28>(dst, src); -B29: + B29: return fixed_size_memory_copy<29>(dst, src); -B30: + B30: return fixed_size_memory_copy<30>(dst, src); -B31: + B31: return fixed_size_memory_copy<31>(dst, src); -B32: + B32: return fixed_size_memory_copy<32>(dst, src); -B33: + B33: return fixed_size_memory_copy<33>(dst, src); -B34: + B34: return fixed_size_memory_copy<34>(dst, src); -B35: + B35: return fixed_size_memory_copy<35>(dst, src); -B36: + B36: return fixed_size_memory_copy<36>(dst, src); -B37: + B37: return fixed_size_memory_copy<37>(dst, src); -B38: + B38: return fixed_size_memory_copy<38>(dst, src); -B39: + B39: return fixed_size_memory_copy<39>(dst, src); -B40: + B40: return fixed_size_memory_copy<40>(dst, src); -B41: + B41: return fixed_size_memory_copy<41>(dst, src); -B42: + B42: return fixed_size_memory_copy<42>(dst, src); -B43: + B43: return fixed_size_memory_copy<43>(dst, src); -B44: + B44: return fixed_size_memory_copy<44>(dst, src); -B45: + B45: return fixed_size_memory_copy<45>(dst, src); -B46: + B46: return fixed_size_memory_copy<46>(dst, src); -B47: + B47: return fixed_size_memory_copy<47>(dst, src); -B48: + B48: return fixed_size_memory_copy<48>(dst, src); -B49: + B49: return fixed_size_memory_copy<49>(dst, src); -B50: + B50: return fixed_size_memory_copy<50>(dst, src); -B51: + B51: return fixed_size_memory_copy<51>(dst, src); -B52: + B52: return fixed_size_memory_copy<52>(dst, src); -B53: + B53: return fixed_size_memory_copy<53>(dst, src); -B54: + B54: return fixed_size_memory_copy<54>(dst, src); -B55: + B55: return fixed_size_memory_copy<55>(dst, src); -B56: + B56: return fixed_size_memory_copy<56>(dst, src); -B57: + B57: return fixed_size_memory_copy<57>(dst, src); -B58: + B58: return fixed_size_memory_copy<58>(dst, src); -B59: + B59: return fixed_size_memory_copy<59>(dst, src); -B60: + B60: return fixed_size_memory_copy<60>(dst, src); -B61: + B61: return fixed_size_memory_copy<61>(dst, src); -B62: + B62: return fixed_size_memory_copy<62>(dst, src); -B63: + B63: return fixed_size_memory_copy<63>(dst, src); -B64: + B64: return fixed_size_memory_copy<64>(dst, src); -B65: + B65: return fixed_size_memory_copy<65>(dst, src); -B66: + B66: return fixed_size_memory_copy<66>(dst, src); -B67: + B67: return fixed_size_memory_copy<67>(dst, src); -B68: + B68: return fixed_size_memory_copy<68>(dst, src); -B69: + B69: return fixed_size_memory_copy<69>(dst, src); -B70: + B70: return fixed_size_memory_copy<70>(dst, src); -B71: + B71: return fixed_size_memory_copy<71>(dst, src); -B72: + B72: return fixed_size_memory_copy<72>(dst, src); -B73: + B73: return fixed_size_memory_copy<73>(dst, src); -B74: + B74: return fixed_size_memory_copy<74>(dst, src); -B75: + B75: return fixed_size_memory_copy<75>(dst, src); -B76: + B76: return fixed_size_memory_copy<76>(dst, src); -B77: + B77: return fixed_size_memory_copy<77>(dst, src); -B78: + B78: return fixed_size_memory_copy<78>(dst, src); -B79: + B79: return fixed_size_memory_copy<79>(dst, src); -B80: + B80: return fixed_size_memory_copy<80>(dst, src); -B81: + B81: return fixed_size_memory_copy<81>(dst, src); -B82: + B82: return fixed_size_memory_copy<82>(dst, src); -B83: + B83: return fixed_size_memory_copy<83>(dst, src); -B84: + B84: return fixed_size_memory_copy<84>(dst, src); -B85: + B85: return fixed_size_memory_copy<85>(dst, src); -B86: + B86: return fixed_size_memory_copy<86>(dst, src); -B87: + B87: return fixed_size_memory_copy<87>(dst, src); -B88: + B88: return fixed_size_memory_copy<88>(dst, src); -B89: + B89: return fixed_size_memory_copy<89>(dst, src); -B90: + B90: return fixed_size_memory_copy<90>(dst, src); -B91: + B91: return fixed_size_memory_copy<91>(dst, src); -B92: + B92: return fixed_size_memory_copy<92>(dst, src); -B93: + B93: return fixed_size_memory_copy<93>(dst, src); -B94: + B94: return fixed_size_memory_copy<94>(dst, src); -B95: + B95: return fixed_size_memory_copy<95>(dst, src); -B96: + B96: return fixed_size_memory_copy<96>(dst, src); -B97: + B97: return fixed_size_memory_copy<97>(dst, src); -B98: + B98: return fixed_size_memory_copy<98>(dst, src); -B99: + B99: return fixed_size_memory_copy<99>(dst, src); -B100: + B100: return fixed_size_memory_copy<100>(dst, src); -B101: + B101: return fixed_size_memory_copy<101>(dst, src); -B102: + B102: return fixed_size_memory_copy<102>(dst, src); -B103: + B103: return fixed_size_memory_copy<103>(dst, src); -B104: + B104: return fixed_size_memory_copy<104>(dst, src); -B105: + B105: return fixed_size_memory_copy<105>(dst, src); -B106: + B106: return fixed_size_memory_copy<106>(dst, src); -B107: + B107: return fixed_size_memory_copy<107>(dst, src); -B108: + B108: return fixed_size_memory_copy<108>(dst, src); -B109: + B109: return fixed_size_memory_copy<109>(dst, src); -B110: + B110: return fixed_size_memory_copy<110>(dst, src); -B111: + B111: return fixed_size_memory_copy<111>(dst, src); -B112: + B112: return fixed_size_memory_copy<112>(dst, src); -B113: + B113: return fixed_size_memory_copy<113>(dst, src); -B114: + B114: return fixed_size_memory_copy<114>(dst, src); -B115: + B115: return fixed_size_memory_copy<115>(dst, src); -B116: + B116: return fixed_size_memory_copy<116>(dst, src); -B117: + B117: return fixed_size_memory_copy<117>(dst, src); -B118: + B118: return fixed_size_memory_copy<118>(dst, src); -B119: + B119: return fixed_size_memory_copy<119>(dst, src); -B120: + B120: return fixed_size_memory_copy<120>(dst, src); -B121: + B121: return fixed_size_memory_copy<121>(dst, src); -B122: + B122: return fixed_size_memory_copy<122>(dst, src); -B123: + B123: return fixed_size_memory_copy<123>(dst, src); -B124: + B124: return fixed_size_memory_copy<124>(dst, src); -B125: + B125: return fixed_size_memory_copy<125>(dst, src); -B126: + B126: return fixed_size_memory_copy<126>(dst, src); -B127: + B127: return fixed_size_memory_copy<127>(dst, src); -B128: + B128: return fixed_size_memory_copy<128>(dst, src); -B129: + B129: return fixed_size_memory_copy<129>(dst, src); -B130: + B130: return fixed_size_memory_copy<130>(dst, src); -B131: + B131: return fixed_size_memory_copy<131>(dst, src); -B132: + B132: return fixed_size_memory_copy<132>(dst, src); -B133: + B133: return fixed_size_memory_copy<133>(dst, src); -B134: + B134: return fixed_size_memory_copy<134>(dst, src); -B135: + B135: return fixed_size_memory_copy<135>(dst, src); -B136: + B136: return fixed_size_memory_copy<136>(dst, src); -B137: + B137: return fixed_size_memory_copy<137>(dst, src); -B138: + B138: return fixed_size_memory_copy<138>(dst, src); -B139: + B139: return fixed_size_memory_copy<139>(dst, src); -B140: + B140: return fixed_size_memory_copy<140>(dst, src); -B141: + B141: return fixed_size_memory_copy<141>(dst, src); -B142: + B142: return fixed_size_memory_copy<142>(dst, src); -B143: + B143: return fixed_size_memory_copy<143>(dst, src); -B144: + B144: return fixed_size_memory_copy<144>(dst, src); -B145: + B145: return fixed_size_memory_copy<145>(dst, src); -B146: + B146: return fixed_size_memory_copy<146>(dst, src); -B147: + B147: return fixed_size_memory_copy<147>(dst, src); -B148: + B148: return fixed_size_memory_copy<148>(dst, src); -B149: + B149: return fixed_size_memory_copy<149>(dst, src); -B150: + B150: return fixed_size_memory_copy<150>(dst, src); -B151: + B151: return fixed_size_memory_copy<151>(dst, src); -B152: + B152: return fixed_size_memory_copy<152>(dst, src); -B153: + B153: return fixed_size_memory_copy<153>(dst, src); -B154: + B154: return fixed_size_memory_copy<154>(dst, src); -B155: + B155: return fixed_size_memory_copy<155>(dst, src); -B156: + B156: return fixed_size_memory_copy<156>(dst, src); -B157: + B157: return fixed_size_memory_copy<157>(dst, src); -B158: + B158: return fixed_size_memory_copy<158>(dst, src); -B159: + B159: return fixed_size_memory_copy<159>(dst, src); -B160: + B160: return fixed_size_memory_copy<160>(dst, src); -B161: + B161: return fixed_size_memory_copy<161>(dst, src); -B162: + B162: return fixed_size_memory_copy<162>(dst, src); -B163: + B163: return fixed_size_memory_copy<163>(dst, src); -B164: + B164: return fixed_size_memory_copy<164>(dst, src); -B165: + B165: return fixed_size_memory_copy<165>(dst, src); -B166: + B166: return fixed_size_memory_copy<166>(dst, src); -B167: + B167: return fixed_size_memory_copy<167>(dst, src); -B168: + B168: return fixed_size_memory_copy<168>(dst, src); -B169: + B169: return fixed_size_memory_copy<169>(dst, src); -B170: + B170: return fixed_size_memory_copy<170>(dst, src); -B171: + B171: return fixed_size_memory_copy<171>(dst, src); -B172: + B172: return fixed_size_memory_copy<172>(dst, src); -B173: + B173: return fixed_size_memory_copy<173>(dst, src); -B174: + B174: return fixed_size_memory_copy<174>(dst, src); -B175: + B175: return fixed_size_memory_copy<175>(dst, src); -B176: + B176: return fixed_size_memory_copy<176>(dst, src); -B177: + B177: return fixed_size_memory_copy<177>(dst, src); -B178: + B178: return fixed_size_memory_copy<178>(dst, src); -B179: + B179: return fixed_size_memory_copy<179>(dst, src); -B180: + B180: return fixed_size_memory_copy<180>(dst, src); -B181: + B181: return fixed_size_memory_copy<181>(dst, src); -B182: + B182: return fixed_size_memory_copy<182>(dst, src); -B183: + B183: return fixed_size_memory_copy<183>(dst, src); -B184: + B184: return fixed_size_memory_copy<184>(dst, src); -B185: + B185: return fixed_size_memory_copy<185>(dst, src); -B186: + B186: return fixed_size_memory_copy<186>(dst, src); -B187: + B187: return fixed_size_memory_copy<187>(dst, src); -B188: + B188: return fixed_size_memory_copy<188>(dst, src); -B189: + B189: return fixed_size_memory_copy<189>(dst, src); -B190: + B190: return fixed_size_memory_copy<190>(dst, src); -B191: + B191: return fixed_size_memory_copy<191>(dst, src); -B192: + B192: return fixed_size_memory_copy<192>(dst, src); -B193: + B193: return fixed_size_memory_copy<193>(dst, src); -B194: + B194: return fixed_size_memory_copy<194>(dst, src); -B195: + B195: return fixed_size_memory_copy<195>(dst, src); -B196: + B196: return fixed_size_memory_copy<196>(dst, src); -B197: + B197: return fixed_size_memory_copy<197>(dst, src); -B198: + B198: return fixed_size_memory_copy<198>(dst, src); -B199: + B199: return fixed_size_memory_copy<199>(dst, src); -B200: + B200: return fixed_size_memory_copy<200>(dst, src); -B201: + B201: return fixed_size_memory_copy<201>(dst, src); -B202: + B202: return fixed_size_memory_copy<202>(dst, src); -B203: + B203: return fixed_size_memory_copy<203>(dst, src); -B204: + B204: return fixed_size_memory_copy<204>(dst, src); -B205: + B205: return fixed_size_memory_copy<205>(dst, src); -B206: + B206: return fixed_size_memory_copy<206>(dst, src); -B207: + B207: return fixed_size_memory_copy<207>(dst, src); -B208: + B208: return fixed_size_memory_copy<208>(dst, src); -B209: + B209: return fixed_size_memory_copy<209>(dst, src); -B210: + B210: return fixed_size_memory_copy<210>(dst, src); -B211: + B211: return fixed_size_memory_copy<211>(dst, src); -B212: + B212: return fixed_size_memory_copy<212>(dst, src); -B213: + B213: return fixed_size_memory_copy<213>(dst, src); -B214: + B214: return fixed_size_memory_copy<214>(dst, src); -B215: + B215: return fixed_size_memory_copy<215>(dst, src); -B216: + B216: return fixed_size_memory_copy<216>(dst, src); -B217: + B217: return fixed_size_memory_copy<217>(dst, src); -B218: + B218: return fixed_size_memory_copy<218>(dst, src); -B219: + B219: return fixed_size_memory_copy<219>(dst, src); -B220: + B220: return fixed_size_memory_copy<220>(dst, src); -B221: + B221: return fixed_size_memory_copy<221>(dst, src); -B222: + B222: return fixed_size_memory_copy<222>(dst, src); -B223: + B223: return fixed_size_memory_copy<223>(dst, src); -B224: + B224: return fixed_size_memory_copy<224>(dst, src); -B225: + B225: return fixed_size_memory_copy<225>(dst, src); -B226: + B226: return fixed_size_memory_copy<226>(dst, src); -B227: + B227: return fixed_size_memory_copy<227>(dst, src); -B228: + B228: return fixed_size_memory_copy<228>(dst, src); -B229: + B229: return fixed_size_memory_copy<229>(dst, src); -B230: + B230: return fixed_size_memory_copy<230>(dst, src); -B231: + B231: return fixed_size_memory_copy<231>(dst, src); -B232: + B232: return fixed_size_memory_copy<232>(dst, src); -B233: + B233: return fixed_size_memory_copy<233>(dst, src); -B234: + B234: return fixed_size_memory_copy<234>(dst, src); -B235: + B235: return fixed_size_memory_copy<235>(dst, src); -B236: + B236: return fixed_size_memory_copy<236>(dst, src); -B237: + B237: return fixed_size_memory_copy<237>(dst, src); -B238: + B238: return fixed_size_memory_copy<238>(dst, src); -B239: + B239: return fixed_size_memory_copy<239>(dst, src); -B240: + B240: return fixed_size_memory_copy<240>(dst, src); -B241: + B241: return fixed_size_memory_copy<241>(dst, src); -B242: + B242: return fixed_size_memory_copy<242>(dst, src); -B243: + B243: return fixed_size_memory_copy<243>(dst, src); -B244: + B244: return fixed_size_memory_copy<244>(dst, src); -B245: + B245: return fixed_size_memory_copy<245>(dst, src); -B246: + B246: return fixed_size_memory_copy<246>(dst, src); -B247: + B247: return fixed_size_memory_copy<247>(dst, src); -B248: + B248: return fixed_size_memory_copy<248>(dst, src); -B249: + B249: return fixed_size_memory_copy<249>(dst, src); -B250: + B250: return fixed_size_memory_copy<250>(dst, src); -B251: + B251: return fixed_size_memory_copy<251>(dst, src); -B252: + B252: return fixed_size_memory_copy<252>(dst, src); -B253: + B253: return fixed_size_memory_copy<253>(dst, src); -B254: + B254: return fixed_size_memory_copy<254>(dst, src); -B255: + B255: return fixed_size_memory_copy<255>(dst, src); } #endif @@ -623,8 +606,8 @@ B255: return; } -} +} // namespace doris -#endif // DORIS_BE_SRC_COMMON_SRC_UTIL_MEM_UTIL_H +#endif // DORIS_BE_SRC_COMMON_SRC_UTIL_MEM_UTIL_H /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/be/src/util/mysql_global.h b/be/src/util/mysql_global.h index 0fb072cd86..634264bce0 100644 --- a/be/src/util/mysql_global.h +++ b/be/src/util/mysql_global.h @@ -37,18 +37,18 @@ typedef unsigned char uchar; #define MY_ALIGN(A, L) (((A) + (L)-1) & ~((L)-1)) #define SIZEOF_CHARP 8 -#define MAX_TINYINT_WIDTH 3 /* Max width for a TINY w.o. sign */ -#define MAX_SMALLINT_WIDTH 5 /* Max width for a SHORT w.o. sign */ -#define MAX_MEDIUMINT_WIDTH 8 /* Max width for a INT24 w.o. sign */ -#define MAX_INT_WIDTH 10 /* Max width for a LONG w.o. sign */ -#define MAX_BIGINT_WIDTH 20 /* Max width for a LONGLONG */ -#define MAX_LARGEINT_WIDTH 39 /* Max width for a LARGEINT */ -#define MAX_CHAR_WIDTH 255 /* Max length for a CHAR column */ +#define MAX_TINYINT_WIDTH 3 /* Max width for a TINY w.o. sign */ +#define MAX_SMALLINT_WIDTH 5 /* Max width for a SHORT w.o. sign */ +#define MAX_MEDIUMINT_WIDTH 8 /* Max width for a INT24 w.o. sign */ +#define MAX_INT_WIDTH 10 /* Max width for a LONG w.o. sign */ +#define MAX_BIGINT_WIDTH 20 /* Max width for a LONGLONG */ +#define MAX_LARGEINT_WIDTH 39 /* Max width for a LARGEINT */ +#define MAX_CHAR_WIDTH 255 /* Max length for a CHAR column */ #define MAX_BLOB_WIDTH 16777216 /* Default width for blob */ -#define MAX_TIME_WIDTH 10 /* Max width for a TIME HH:MM:SS*/ +#define MAX_TIME_WIDTH 10 /* Max width for a TIME HH:MM:SS*/ #define MAX_DECPT_FOR_F_FORMAT DBL_DIG #define MAX_DATETIME_WIDTH 27 /* YYYY-MM-DD HH:MM:SS.ssssss */ -#define MAX_DECIMAL_WIDTH 29 /* Max width for a DECIMAL */ +#define MAX_DECIMAL_WIDTH 29 /* Max width for a DECIMAL */ /* -[digits].E+## */ #define MAX_FLOAT_STR_LENGTH 24 // see gutil/strings/numbers.h kFloatToBufferSize diff --git a/be/src/util/path_trie.hpp b/be/src/util/path_trie.hpp index db309867f3..cca16b883d 100644 --- a/be/src/util/path_trie.hpp +++ b/be/src/util/path_trie.hpp @@ -25,14 +25,10 @@ namespace doris { // This tree is usd for manage restful api path. -template +template class PathTrie { public: - PathTrie() : - _root("/", "*"), - _root_value(nullptr), - _separator('/') { - }; + PathTrie() : _root("/", "*"), _root_value(nullptr), _separator('/') {}; ~PathTrie() { if (_root_value != nullptr) { @@ -43,17 +39,15 @@ public: class TrieNode { public: - TrieNode(const std::string& key, const std::string& wildcard) : - _value(nullptr), - _wildcard(wildcard) { + TrieNode(const std::string& key, const std::string& wildcard) + : _value(nullptr), _wildcard(wildcard) { if (is_named_wildcard(key)) { _named_wildcard = extract_template(key); } } - TrieNode(const std::string& key, const T& value, const std::string& wildcard) : - _value(nullptr), - _wildcard(wildcard) { + TrieNode(const std::string& key, const T& value, const std::string& wildcard) + : _value(nullptr), _wildcard(wildcard) { _value = _allocator.allocate(1); _allocator.construct(_value, value); if (is_named_wildcard(key)) { @@ -100,8 +94,7 @@ public: // If this is a template, set this to the node if (is_named_wildcard(token)) { std::string temp = extract_template(token); - if (node->_named_wildcard.empty() - || node->_named_wildcard.compare(temp) == 0) { + if (node->_named_wildcard.empty() || node->_named_wildcard.compare(temp) == 0) { node->_named_wildcard = temp; } else { // Duplicated @@ -121,8 +114,8 @@ public: return node->insert(path, index + 1, value); } - bool retrieve(const std::vector path, int index, - T* value, std::map* params) { + bool retrieve(const std::vector path, int index, T* value, + std::map* params) { // check max index if (index >= path.size()) { return false; @@ -132,15 +125,14 @@ public: TrieNode* node = get_child(token); if (node == nullptr) { node = get_child(_wildcard); - if (node == nullptr) { + if (node == nullptr) { return false; } use_wildcard = true; } else { // If we the last one, but we have no value, check wildcard - if (index == path.size() - 1 - && node->_value == nullptr - && get_child(_wildcard) != nullptr) { + if (index == path.size() - 1 && node->_value == nullptr && + get_child(_wildcard) != nullptr) { node = get_child(_wildcard); use_wildcard = true; } else { @@ -173,10 +165,10 @@ public: } return false; } + private: bool is_named_wildcard(const std::string& key) { - if (key.find('{') != std::string::npos - && key.find('}') != std::string::npos) { + if (key.find('{') != std::string::npos && key.find('}') != std::string::npos) { return true; } return false; @@ -196,8 +188,8 @@ public: return pair->second; } - void put(std::map* params, - TrieNode* node, const std::string& token) { + void put(std::map* params, TrieNode* node, + const std::string& token) { if (params != nullptr && !node->_named_wildcard.empty()) { params->insert(std::make_pair(node->_named_wildcard, token)); } @@ -229,12 +221,9 @@ public: return _root.insert(path_array, index, value); } - bool retrieve(const std::string& path, T* value) { - return retrieve(path, value, nullptr); - } + bool retrieve(const std::string& path, T* value) { return retrieve(path, value, nullptr); } - bool retrieve(const std::string& path, T* value, - std::map* params) { + bool retrieve(const std::string& path, T* value, std::map* params) { if (path.empty()) { if (_root_value == nullptr) { return false; @@ -284,4 +273,4 @@ private: std::allocator _allocator; }; -} +} // namespace doris diff --git a/be/src/util/priority_thread_pool.hpp b/be/src/util/priority_thread_pool.hpp index ed0fe90157..ba0712973b 100644 --- a/be/src/util/priority_thread_pool.hpp +++ b/be/src/util/priority_thread_pool.hpp @@ -115,8 +115,10 @@ public: shutdown(); join(); } + protected: virtual bool is_shutdown() { return _shutdown; } + private: // Driver method for each thread in the pool. Continues to read work from the queue // until the pool is shutdown. diff --git a/be/src/util/priority_work_stealing_thread_pool.hpp b/be/src/util/priority_work_stealing_thread_pool.hpp index 3c86098034..c2f717a18f 100644 --- a/be/src/util/priority_work_stealing_thread_pool.hpp +++ b/be/src/util/priority_work_stealing_thread_pool.hpp @@ -29,7 +29,6 @@ namespace doris { // blocking queues by Offer(). Each item is processed by a single user-supplied method. class PriorityWorkStealingThreadPool : public PriorityThreadPool { public: - // Creates a new thread pool and start num_threads threads. // -- num_threads: how many threads are part of this pool // -- num_queues: how many queues are part of this pool @@ -46,8 +45,8 @@ public: _work_queues.emplace_back(std::make_shared>(queue_size)); } for (int i = 0; i < num_threads; ++i) { - _threads.create_thread( - std::bind(std::mem_fn(&PriorityWorkStealingThreadPool::work_thread), this, i)); + _threads.create_thread(std::bind( + std::mem_fn(&PriorityWorkStealingThreadPool::work_thread), this, i)); } } @@ -62,9 +61,7 @@ public: // // Returns true if the work item was successfully added to the queue, false otherwise // (which typically means that the thread pool has already been shut down). - bool offer(Task task) override { - return _work_queues[task.queue_id]->blocking_put(task); - } + bool offer(Task task) override { return _work_queues[task.queue_id]->blocking_put(task); } bool offer(WorkFunction func) override { PriorityThreadPool::Task task = {0, func, 0}; @@ -119,7 +116,8 @@ private: // avoid blocking get bool is_other_queues_empty = true; // steal work in round-robin if nothing to do - while (_work_queues[queue_id]->get_size() == 0 && queue_id != steal_queue_id && !is_shutdown()) { + while (_work_queues[queue_id]->get_size() == 0 && queue_id != steal_queue_id && + !is_shutdown()) { if (_work_queues[steal_queue_id]->non_blocking_get(&task)) { is_other_queues_empty = false; task.work_function(); @@ -129,7 +127,9 @@ private: if (queue_id == steal_queue_id) { steal_queue_id = (steal_queue_id + 1) % _work_queues.size(); } - if (is_other_queues_empty && _work_queues[queue_id]->blocking_get(&task, config::doris_blocking_priority_queue_wait_timeout_ms)) { + if (is_other_queues_empty && + _work_queues[queue_id]->blocking_get( + &task, config::doris_blocking_priority_queue_wait_timeout_ms)) { task.work_function(); } if (_work_queues[queue_id]->get_size() == 0) { diff --git a/be/src/util/progress_updater.cpp b/be/src/util/progress_updater.cpp index 67d1156e44..f309d6971c 100644 --- a/be/src/util/progress_updater.cpp +++ b/be/src/util/progress_updater.cpp @@ -32,10 +32,7 @@ ProgressUpdater::ProgressUpdater(const std::string& label, int64_t total, int pe _last_output_percentage(0) {} ProgressUpdater::ProgressUpdater() - : _total(0), - _update_period(0), - _num_complete(0), - _last_output_percentage(0) {} + : _total(0), _update_period(0), _num_complete(0), _last_output_percentage(0) {} void ProgressUpdater::update(int64_t delta) { DCHECK_GE(delta, 0); @@ -53,8 +50,7 @@ void ProgressUpdater::update(int64_t delta) { if (num_complete >= _total) { // Always print the final 100% complete - VLOG_DEBUG << _label << " 100\% Complete (" << num_complete << " out of " - << _total << ")"; + VLOG_DEBUG << _label << " 100\% Complete (" << num_complete << " out of " << _total << ")"; return; } @@ -65,7 +61,7 @@ void ProgressUpdater::update(int64_t delta) { // Only update shared variable if this guy was the latest. __sync_val_compare_and_swap(&_last_output_percentage, old_percentage, new_percentage); VLOG_DEBUG << _label << ": " << new_percentage << "\% Complete (" << num_complete - << " out of " << _total << ")"; + << " out of " << _total << ")"; } } } // namespace doris diff --git a/be/src/util/proto_util.h b/be/src/util/proto_util.h index 52dcec1861..ee9213dc3a 100644 --- a/be/src/util/proto_util.h +++ b/be/src/util/proto_util.h @@ -25,7 +25,8 @@ namespace doris { // This can avoid reaching the upper limit of the ProtoBuf Request length (2G), // and it is expected that performance can be improved. template -inline void request_row_batch_transfer_attachment(Params* brpc_request, const std::string& tuple_data, Closure* closure) { +inline void request_row_batch_transfer_attachment(Params* brpc_request, + const std::string& tuple_data, Closure* closure) { auto row_batch = brpc_request->mutable_row_batch(); row_batch->set_tuple_data(""); brpc_request->set_transfer_by_attachment(true); @@ -38,7 +39,8 @@ inline void request_row_batch_transfer_attachment(Params* brpc_request, const st // This can avoid reaching the upper limit of the ProtoBuf Request length (2G), // and it is expected that performance can be improved. template -inline void request_block_transfer_attachment(Params* brpc_request, const std::string& column_values, Closure* closure) { +inline void request_block_transfer_attachment(Params* brpc_request, + const std::string& column_values, Closure* closure) { auto block = brpc_request->mutable_block(); block->set_column_values(""); brpc_request->set_transfer_by_attachment(true); @@ -49,7 +51,8 @@ inline void request_block_transfer_attachment(Params* brpc_request, const std::s // Controller Attachment transferred to RowBatch in ProtoBuf Request. template -inline void attachment_transfer_request_row_batch(const Params* brpc_request, brpc::Controller* cntl) { +inline void attachment_transfer_request_row_batch(const Params* brpc_request, + brpc::Controller* cntl) { Params* req = const_cast(brpc_request); if (req->has_row_batch() && req->transfer_by_attachment()) { auto rb = req->mutable_row_batch(); diff --git a/be/src/util/radix_sort.h b/be/src/util/radix_sort.h index eaa84876c0..e90345fdb7 100644 --- a/be/src/util/radix_sort.h +++ b/be/src/util/radix_sort.h @@ -56,7 +56,7 @@ using is_unsigned_v = typename std::is_unsigned::value; template decay_t bit_cast(const From& from) { - To res{}; + To res {}; memcpy(static_cast(&res), &from, std::min(sizeof(res), sizeof(from))); return res; } diff --git a/be/src/util/s3_storage_backend.cpp b/be/src/util/s3_storage_backend.cpp index 57047a2bf1..cb37d50963 100644 --- a/be/src/util/s3_storage_backend.cpp +++ b/be/src/util/s3_storage_backend.cpp @@ -126,8 +126,8 @@ Status S3StorageBackend::upload(const std::string& local, const std::string& rem RETRUN_S3_STATUS(response); } -Status S3StorageBackend::list(const std::string& remote_path, bool contain_md5, - bool recursion, std::map* files) { +Status S3StorageBackend::list(const std::string& remote_path, bool contain_md5, bool recursion, + std::map* files) { std::string normal_str(remote_path); if (!normal_str.empty() && normal_str.at(normal_str.size() - 1) != '/') { normal_str += '/'; @@ -156,7 +156,8 @@ Status S3StorageBackend::list(const std::string& remote_path, bool contain_md5, // Not found checksum separator, ignore this file continue; } - FileStat stat = {std::string(key, 0, pos), std::string(key, pos + 1), object.GetSize()}; + FileStat stat = {std::string(key, 0, pos), std::string(key, pos + 1), + object.GetSize()}; files->emplace(std::string(key, 0, pos), stat); } else { FileStat stat = {key, "", object.GetSize()}; @@ -221,7 +222,7 @@ Status S3StorageBackend::rmdir(const std::string& remote) { LOG(INFO) << "Remove S3 dir: " << remote; RETURN_IF_ERROR(list(normal_path, false, true, &files)); - for (auto &file : files) { + for (auto& file : files) { std::string file_path = normal_path + file.second.name; RETURN_IF_ERROR(rm(file_path)); } @@ -250,7 +251,7 @@ Status S3StorageBackend::copy_dir(const std::string& src, const std::string& dst LOG(WARNING) << "Nothing need to copy: " << src << " -> " << dst; return Status::OK(); } - for (auto &kv : files) { + for (auto& kv : files) { RETURN_IF_ERROR(copy(src + "/" + kv.first, dst + "/" + kv.first)); } return Status::OK(); diff --git a/be/src/util/s3_storage_backend.h b/be/src/util/s3_storage_backend.h index 2850adfcf9..b6d31c5593 100644 --- a/be/src/util/s3_storage_backend.h +++ b/be/src/util/s3_storage_backend.h @@ -36,8 +36,8 @@ public: Status upload(const std::string& local, const std::string& remote) override; Status upload_with_checksum(const std::string& local, const std::string& remote, const std::string& checksum) override; - Status list(const std::string& remote_path, bool contain_md5, - bool recursion, std::map* files) override; + Status list(const std::string& remote_path, bool contain_md5, bool recursion, + std::map* files) override; Status rename(const std::string& orig_name, const std::string& new_name) override; Status rename_dir(const std::string& orig_name, const std::string& new_name) override; Status direct_upload(const std::string& remote, const std::string& content) override; diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp index 9744168f7e..857788a667 100644 --- a/be/src/util/s3_uri.cpp +++ b/be/src/util/s3_uri.cpp @@ -54,7 +54,7 @@ bool S3URI::parse() { StripWhiteSpace(&_key); if (_key.empty()) { LOG(WARNING) << "Invalid S3 key: " << _location; - return false; + return false; } _bucket = authority_split[0]; // Strip query and fragment if they exist diff --git a/be/src/util/s3_uri.h b/be/src/util/s3_uri.h index 162e45ea12..dc6a07fd23 100644 --- a/be/src/util/s3_uri.h +++ b/be/src/util/s3_uri.h @@ -21,7 +21,6 @@ #include "util/string_util.h" - namespace doris { class S3URI { diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp index 58fdc46a7a..feb19f2e3f 100644 --- a/be/src/util/s3_util.cpp +++ b/be/src/util/s3_util.cpp @@ -29,7 +29,7 @@ namespace doris { const static std::string USE_PATH_STYLE = "use_path_style"; ClientFactory::ClientFactory() { - _aws_options = Aws::SDKOptions{}; + _aws_options = Aws::SDKOptions {}; Aws::Utils::Logging::LogLevel logLevel = static_cast(config::aws_log_level); _aws_options.loggingOptions.logLevel = logLevel; @@ -80,10 +80,12 @@ std::shared_ptr ClientFactory::create( aws_config.maxConnections = std::atoi(properties.find(S3_MAX_CONN_SIZE)->second.c_str()); } if (properties.find(S3_REQUEST_TIMEOUT_MS) != properties.end()) { - aws_config.requestTimeoutMs = std::atoi(properties.find(S3_REQUEST_TIMEOUT_MS)->second.c_str()); + aws_config.requestTimeoutMs = + std::atoi(properties.find(S3_REQUEST_TIMEOUT_MS)->second.c_str()); } if (properties.find(S3_CONN_TIMEOUT_MS) != properties.end()) { - aws_config.connectTimeoutMs = std::atoi(properties.find(S3_CONN_TIMEOUT_MS)->second.c_str()); + aws_config.connectTimeoutMs = + std::atoi(properties.find(S3_CONN_TIMEOUT_MS)->second.c_str()); } // See https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_s3_client.html @@ -91,9 +93,9 @@ std::shared_ptr ClientFactory::create( if (properties.find(USE_PATH_STYLE) != properties.end()) { use_virtual_addressing = properties.find(USE_PATH_STYLE)->second == "true" ? false : true; } - return std::make_shared(std::move(aws_cred), std::move(aws_config), - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, - use_virtual_addressing); + return std::make_shared( + std::move(aws_cred), std::move(aws_config), + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, use_virtual_addressing); } } // end namespace doris diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h index e4baa97d19..fd4dd17b75 100644 --- a/be/src/util/s3_util.h +++ b/be/src/util/s3_util.h @@ -48,6 +48,7 @@ public: std::shared_ptr create(const std::map& prop); static bool is_s3_conf_valid(const std::map& prop); + private: ClientFactory(); diff --git a/be/src/util/semaphore.hpp b/be/src/util/semaphore.hpp index dac287d604..7be2f40df7 100644 --- a/be/src/util/semaphore.hpp +++ b/be/src/util/semaphore.hpp @@ -23,28 +23,27 @@ namespace { class Semaphore { - public: - explicit Semaphore(int count = 0) : _count(count) { - } +public: + explicit Semaphore(int count = 0) : _count(count) {} - void set_count(int count) { _count = count; } + void set_count(int count) { _count = count; } - void signal() { - std::unique_lock lock(_mutex); - ++_count; - _cv.notify_one(); - } + void signal() { + std::unique_lock lock(_mutex); + ++_count; + _cv.notify_one(); + } - void wait() { - std::unique_lock lock(_mutex); - _cv.wait(lock, [=] { return _count > 0; }); - --_count; - } + void wait() { + std::unique_lock lock(_mutex); + _cv.wait(lock, [=] { return _count > 0; }); + --_count; + } - private: - std::mutex _mutex; - std::condition_variable _cv; - int _count; +private: + std::mutex _mutex; + std::condition_variable _cv; + int _count; }; } // end namespace diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h index d2e942349d..9bdedf99c3 100644 --- a/be/src/util/simd/bits.h +++ b/be/src/util/simd/bits.h @@ -31,8 +31,8 @@ namespace simd { inline uint32_t bytes32_mask_to_bits32_mask(const uint8_t* data) { #ifdef __AVX2__ auto zero32 = _mm256_setzero_si256(); - uint32_t mask = static_cast(_mm256_movemask_epi8(_mm256_cmpgt_epi8( - _mm256_loadu_si256(reinterpret_cast(data)), zero32))); + uint32_t mask = static_cast(_mm256_movemask_epi8( + _mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(data)), zero32))); #elif __SSE2__ auto zero16 = _mm_setzero_si128(); uint32_t mask = @@ -51,10 +51,9 @@ inline uint32_t bytes32_mask_to_bits32_mask(const uint8_t* data) { return mask; } -inline uint32_t bytes32_mask_to_bits32_mask(const bool * data) { +inline uint32_t bytes32_mask_to_bits32_mask(const bool* data) { return bytes32_mask_to_bits32_mask(reinterpret_cast(data)); } - } // namespace simd } // namespace doris \ No newline at end of file diff --git a/be/src/util/simd/lower_upper_impl.h b/be/src/util/simd/lower_upper_impl.h index c8a2572f7f..f3fd442931 100644 --- a/be/src/util/simd/lower_upper_impl.h +++ b/be/src/util/simd/lower_upper_impl.h @@ -30,8 +30,7 @@ namespace doris::simd { template class LowerUpperImpl { public: - static void transfer(const uint8_t * src, const uint8_t * src_end, uint8_t * dst) - { + static void transfer(const uint8_t* src, const uint8_t* src_end, uint8_t* dst) { const auto flip_case_mask = 'A' ^ 'a'; #ifdef __SSE2__ @@ -42,14 +41,13 @@ public: const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); - for (; src < src_end_sse; src += bytes_sse, dst += bytes_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(src)); - const auto is_not_case - = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound)); + for (; src < src_end_sse; src += bytes_sse, dst += bytes_sse) { + const auto chars = _mm_loadu_si128(reinterpret_cast(src)); + const auto is_not_case = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), + _mm_cmplt_epi8(chars, v_not_case_upper_bound)); const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case); const auto cased_chars = _mm_xor_si128(chars, xor_mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), cased_chars); } #endif @@ -60,4 +58,4 @@ public: *dst = *src; } }; -} +} // namespace doris::simd diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h index c4b268fcf7..a74a4f0ca6 100644 --- a/be/src/util/simd/vstring_function.h +++ b/be/src/util/simd/vstring_function.h @@ -60,15 +60,15 @@ public: auto end = str.len - 1; #ifdef __SSE2__ char blank = ' '; - const auto pattern = _mm_set1_epi8(blank); + const auto pattern = _mm_set1_epi8(blank); while (end - begin + 1 >= REGISTER_SIZE) { - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(str.ptr + end + 1 - REGISTER_SIZE)); + const auto v_haystack = _mm_loadu_si128( + reinterpret_cast(str.ptr + end + 1 - REGISTER_SIZE)); const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern); const auto mask = _mm_movemask_epi8(v_against_pattern); int offset = __builtin_clz(~(mask << REGISTER_SIZE)); /// means not found - if (offset == 0) - { + if (offset == 0) { return StringVal(str.ptr + begin, end - begin + 1); } else { end -= offset; @@ -92,9 +92,10 @@ public: auto end = str.len - 1; #ifdef __SSE2__ char blank = ' '; - const auto pattern = _mm_set1_epi8(blank); + const auto pattern = _mm_set1_epi8(blank); while (end - begin + 1 >= REGISTER_SIZE) { - const auto v_haystack = _mm_loadu_si128(reinterpret_cast(str.ptr + begin)); + const auto v_haystack = + _mm_loadu_si128(reinterpret_cast(str.ptr + begin)); const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern); const auto mask = _mm_movemask_epi8(v_against_pattern) ^ 0xffff; /// zero means not found @@ -156,13 +157,15 @@ public: #if defined(__SSE2__) constexpr auto step = sizeof(uint64); if (src_str + step < src_str_end) { - const auto hex_map = _mm_loadu_si128(reinterpret_cast(hex_table)); + const auto hex_map = _mm_loadu_si128(reinterpret_cast(hex_table)); const auto mask_map = _mm_set1_epi8(0x0F); do { auto data = _mm_loadu_si64(src_str); - auto hex_loc = _mm_and_si128(_mm_unpacklo_epi8(_mm_srli_epi64(data, 4), data), mask_map); - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst_str), _mm_shuffle_epi8(hex_map, hex_loc)); + auto hex_loc = + _mm_and_si128(_mm_unpacklo_epi8(_mm_srli_epi64(data, 4), data), mask_map); + _mm_storeu_si128(reinterpret_cast<__m128i*>(dst_str), + _mm_shuffle_epi8(hex_map, hex_loc)); src_str += step; dst_str += step * 2; @@ -180,7 +183,7 @@ public: } } - static void to_lower(uint8_t * src, int64_t len, uint8_t * dst) { + static void to_lower(uint8_t* src, int64_t len, uint8_t* dst) { if (len <= 0) { return; } @@ -188,7 +191,7 @@ public: lowerUpper.transfer(src, src + len, dst); } - static void to_upper(uint8_t * src, int64_t len, uint8_t * dst) { + static void to_upper(uint8_t* src, int64_t len, uint8_t* dst) { if (len <= 0) { return; } @@ -196,5 +199,5 @@ public: lowerUpper.transfer(src, src + len, dst); } }; -} -} +} // namespace simd +} // namespace doris diff --git a/be/src/util/sort_heap.h b/be/src/util/sort_heap.h index 2bbba555ee..c7b95ae3c2 100644 --- a/be/src/util/sort_heap.h +++ b/be/src/util/sort_heap.h @@ -112,7 +112,7 @@ private: ++child_idx; } - /// Check if we are in order. + /// Check if we are in order. } while (!(_comp(*child_it, top))); *curr_it = top; } diff --git a/be/src/util/sse_util.hpp b/be/src/util/sse_util.hpp index db99218db9..3cabc27e1a 100644 --- a/be/src/util/sse_util.hpp +++ b/be/src/util/sse_util.hpp @@ -49,30 +49,15 @@ static const int STRCHR_MODE = _SIDD_CMP_EQUAL_ANY | _SIDD_UBYTE_OPS; // In this mode, sse text processing functions will return the number of bytes that match // consecutively from the beginning. -static const int STRCMP_MODE = _SIDD_CMP_EQUAL_EACH | _SIDD_UBYTE_OPS - | _SIDD_NEGATIVE_POLARITY; +static const int STRCMP_MODE = _SIDD_CMP_EQUAL_EACH | _SIDD_UBYTE_OPS | _SIDD_NEGATIVE_POLARITY; // Precomputed mask values up to 16 bits. static const int SSE_BITMASK[CHARS_PER_128_BIT_REGISTER] = { - 1 << 0, - 1 << 1, - 1 << 2, - 1 << 3, - 1 << 4, - 1 << 5, - 1 << 6, - 1 << 7, - 1 << 8, - 1 << 9, - 1 << 10, - 1 << 11, - 1 << 12, - 1 << 13, - 1 << 14, - 1 << 15, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 8, 1 << 9, 1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, }; -} -} +} // namespace sse_util +} // namespace doris #endif diff --git a/be/src/util/stat_util.hpp b/be/src/util/stat_util.hpp index e1d98568f1..6a6c3f50bd 100644 --- a/be/src/util/stat_util.hpp +++ b/be/src/util/stat_util.hpp @@ -49,6 +49,6 @@ public: } }; -} +} // namespace doris #endif diff --git a/be/src/util/storage_backend.h b/be/src/util/storage_backend.h index a05acb21ba..d18b4e73b0 100644 --- a/be/src/util/storage_backend.h +++ b/be/src/util/storage_backend.h @@ -34,8 +34,8 @@ public: virtual Status upload(const std::string& local, const std::string& remote) = 0; virtual Status upload_with_checksum(const std::string& local, const std::string& remote, const std::string& checksum) = 0; - virtual Status list(const std::string& remote_path, bool contain_md5, - bool recursion, std::map* files) = 0; + virtual Status list(const std::string& remote_path, bool contain_md5, bool recursion, + std::map* files) = 0; virtual Status rename(const std::string& orig_name, const std::string& new_name) = 0; virtual Status rename_dir(const std::string& orig_name, const std::string& new_name) = 0; virtual Status direct_upload(const std::string& remote, const std::string& content) = 0; diff --git a/be/src/util/storage_backend_mgr.cpp b/be/src/util/storage_backend_mgr.cpp index 53a49c1d5d..ab907e114f 100644 --- a/be/src/util/storage_backend_mgr.cpp +++ b/be/src/util/storage_backend_mgr.cpp @@ -111,25 +111,25 @@ Status StorageBackendMgr::_create_remote_storage_internal(const StorageParamPB& } std::map storage_prop; switch (storage_param_pb.storage_medium()) { - case StorageMediumPB::S3: - default: - S3StorageParamPB s3_storage_param = storage_param_pb.s3_storage_param(); - if (s3_storage_param.s3_ak().empty() || s3_storage_param.s3_sk().empty() - || s3_storage_param.s3_endpoint().empty() || s3_storage_param.s3_region().empty()) { - return Status::InternalError("s3_storage_param param is invalid"); - } - storage_prop[S3_AK] = s3_storage_param.s3_ak(); - storage_prop[S3_SK] = s3_storage_param.s3_sk(); - storage_prop[S3_ENDPOINT] = s3_storage_param.s3_endpoint(); - storage_prop[S3_REGION] = s3_storage_param.s3_region(); - storage_prop[S3_MAX_CONN_SIZE] = s3_storage_param.s3_max_conn(); - storage_prop[S3_REQUEST_TIMEOUT_MS] = s3_storage_param.s3_request_timeout_ms(); - storage_prop[S3_CONN_TIMEOUT_MS] = s3_storage_param.s3_conn_timeout_ms(); + case StorageMediumPB::S3: + default: + S3StorageParamPB s3_storage_param = storage_param_pb.s3_storage_param(); + if (s3_storage_param.s3_ak().empty() || s3_storage_param.s3_sk().empty() || + s3_storage_param.s3_endpoint().empty() || s3_storage_param.s3_region().empty()) { + return Status::InternalError("s3_storage_param param is invalid"); + } + storage_prop[S3_AK] = s3_storage_param.s3_ak(); + storage_prop[S3_SK] = s3_storage_param.s3_sk(); + storage_prop[S3_ENDPOINT] = s3_storage_param.s3_endpoint(); + storage_prop[S3_REGION] = s3_storage_param.s3_region(); + storage_prop[S3_MAX_CONN_SIZE] = s3_storage_param.s3_max_conn(); + storage_prop[S3_REQUEST_TIMEOUT_MS] = s3_storage_param.s3_request_timeout_ms(); + storage_prop[S3_CONN_TIMEOUT_MS] = s3_storage_param.s3_conn_timeout_ms(); - if (!ClientFactory::is_s3_conf_valid(storage_prop)) { - return Status::InternalError("s3_storage_param is invalid"); - } - _storage_backend_map[storage_name] = std::make_shared(storage_prop); + if (!ClientFactory::is_s3_conf_valid(storage_prop)) { + return Status::InternalError("s3_storage_param is invalid"); + } + _storage_backend_map[storage_name] = std::make_shared(storage_prop); } _storage_param_map[storage_name] = storage_param_pb; _storage_backend_active_time[storage_name] = time(nullptr); @@ -168,11 +168,10 @@ Status StorageBackendMgr::get_root_path(const std::string& storage_name, std::st std::string StorageBackendMgr::get_root_path_from_param(const StorageParamPB& storage_param) { switch (storage_param.storage_medium()) { - case StorageMediumPB::S3: - default: - { - return storage_param.s3_storage_param().root_path(); - } + case StorageMediumPB::S3: + default: { + return storage_param.s3_storage_param().root_path(); + } } } diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp index eecfacce32..bb321c081b 100644 --- a/be/src/util/string_parser.hpp +++ b/be/src/util/string_parser.hpp @@ -55,14 +55,9 @@ namespace doris { // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] class StringParser { public: - enum ParseResult { - PARSE_SUCCESS = 0, - PARSE_FAILURE, - PARSE_OVERFLOW, - PARSE_UNDERFLOW - }; + enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; - template + template class StringParseTraits { public: /// Returns the maximum ascii string length for this type. @@ -70,7 +65,7 @@ public: static int max_ascii_len(); }; - template + template static T numeric_limits(bool negative); static inline __int128 get_scale_multiplier(int scale); @@ -81,7 +76,7 @@ public: template static inline T string_to_int(const char* s, int len, ParseResult* result) { T ans = string_to_int_internal(s, len, result); - if (LIKELY(*result == PARSE_SUCCESS)){ + if (LIKELY(*result == PARSE_SUCCESS)) { return ans; } @@ -95,7 +90,7 @@ public: template static inline T string_to_unsigned_int(const char* s, int len, ParseResult* result) { T ans = string_to_unsigned_int_internal(s, len, result); - if (LIKELY(*result == PARSE_SUCCESS)){ + if (LIKELY(*result == PARSE_SUCCESS)) { return ans; } @@ -118,7 +113,7 @@ public: template static inline T string_to_float(const char* s, int len, ParseResult* result) { T ans = string_to_float_internal(s, len, result); - if (LIKELY(*result == PARSE_SUCCESS)){ + if (LIKELY(*result == PARSE_SUCCESS)) { return ans; } @@ -129,7 +124,7 @@ public: // Parses a string for 'true' or 'false', case insensitive. static inline bool string_to_bool(const char* s, int len, ParseResult* result) { bool ans = string_to_bool_internal(s, len, result); - if (LIKELY(*result == PARSE_SUCCESS)){ + if (LIKELY(*result == PARSE_SUCCESS)) { return ans; } @@ -142,7 +137,7 @@ public: template static Status split_string_to_map(const std::string& base, const T element_separator, - const T key_value_separator, + const T key_value_separator, std::map* result) { int key_pos = 0; int key_end; @@ -167,6 +162,7 @@ public: return Status::OK(); } + private: // This is considerably faster than glibc's implementation. // In the case of overflow, the max/min value for the data type will be returned. @@ -220,7 +216,7 @@ private: // Returns the position of the first non-whitespace character in s. static inline int skip_leading_whitespace(const char* s, int len) { int i = 0; - while(i < len && is_whitespace(s[i])) { + while (i < len && is_whitespace(s[i])) { ++i; } return i; @@ -228,8 +224,8 @@ private: // Our own definition of "isspace" that optimize on the ' ' branch. static inline bool is_whitespace(const char& c) { - return LIKELY(c == ' ') - || UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'); + return LIKELY(c == ' ') || + UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'); } }; // end of class StringParser @@ -290,7 +286,8 @@ inline T StringParser::string_to_int_internal(const char* s, int len, ParseResul } template -inline T StringParser::string_to_unsigned_int_internal(const char* s, int len, ParseResult* result) { +inline T StringParser::string_to_unsigned_int_internal(const char* s, int len, + ParseResult* result) { if (UNLIKELY(len <= 0)) { *result = PARSE_FAILURE; return 0; @@ -337,8 +334,8 @@ inline T StringParser::string_to_unsigned_int_internal(const char* s, int len, P } template -inline T StringParser::string_to_int_internal( - const char* s, int len, int base, ParseResult* result) { +inline T StringParser::string_to_int_internal(const char* s, int len, int base, + ParseResult* result) { typedef typename std::make_unsigned::type UnsignedT; UnsignedT val = 0; UnsignedT max_val = StringParser::numeric_limits(false); @@ -349,10 +346,11 @@ inline T StringParser::string_to_int_internal( } int i = 0; switch (*s) { - case '-': - negative = true; - max_val = StringParser::numeric_limits(false) + 1; - case '+': i = 1; + case '-': + negative = true; + max_val = StringParser::numeric_limits(false) + 1; + case '+': + i = 1; } const T max_div_base = max_val / base; @@ -454,7 +452,7 @@ inline T StringParser::string_to_float_internal(const char* s, int len, ParseRes int first = i; for (; i < len; ++i) { if (LIKELY(s[i] >= '0' && s[i] <= '9')) { - if (s[i] != '0' || sig_figs > 0){ + if (s[i] != '0' || sig_figs > 0) { ++sig_figs; } if (decimal) { @@ -476,9 +474,8 @@ inline T StringParser::string_to_float_internal(const char* s, int len, ParseRes } else if (s[i] == 'e' || s[i] == 'E') { break; } else if (s[i] == 'i' || s[i] == 'I') { - if (len > i + 2 - && (s[i + 1] == 'n' || s[i + 1] == 'N') - && (s[i + 2] == 'f' || s[i + 2] == 'F')) { + if (len > i + 2 && (s[i + 1] == 'n' || s[i + 1] == 'N') && + (s[i + 2] == 'f' || s[i + 2] == 'F')) { // Note: Hive writes inf as Infinity, at least for text. We'll be a little loose // here and interpret any column with inf as a prefix as infinity rather than // checking every remaining byte. @@ -491,7 +488,7 @@ inline T StringParser::string_to_float_internal(const char* s, int len, ParseRes } } else if (s[i] == 'n' || s[i] == 'N') { if (len > i + 2 && (s[i + 1] == 'a' || s[i + 1] == 'A') && - (s[i + 2] == 'n' || s[i + 2] == 'N')) { + (s[i + 2] == 'n' || s[i + 2] == 'N')) { *result = PARSE_SUCCESS; return negative ? -NAN : NAN; } else { @@ -546,18 +543,15 @@ inline bool StringParser::string_to_bool_internal(const char* s, int len, ParseR *result = PARSE_SUCCESS; if (len >= 4 && (s[0] == 't' || s[0] == 'T')) { - bool match = (s[1] == 'r' || s[1] == 'R') && - (s[2] == 'u' || s[2] == 'U') && - (s[3] == 'e' || s[3] == 'E'); + bool match = (s[1] == 'r' || s[1] == 'R') && (s[2] == 'u' || s[2] == 'U') && + (s[3] == 'e' || s[3] == 'E'); if (match && LIKELY(is_all_whitespace(s + 4, len - 4))) { return true; } } else if (len >= 5 && (s[0] == 'f' || s[0] == 'F')) { - bool match = (s[1] == 'a' || s[1] == 'A') && - (s[2] == 'l' || s[2] == 'L') && - (s[3] == 's' || s[3] == 'S') && - (s[4] == 'e' || s[4] == 'E'); - if (match && LIKELY(is_all_whitespace(s + 5, len - 5))){ + bool match = (s[1] == 'a' || s[1] == 'A') && (s[2] == 'l' || s[2] == 'L') && + (s[3] == 's' || s[3] == 'S') && (s[4] == 'e' || s[4] == 'E'); + if (match && LIKELY(is_all_whitespace(s + 5, len - 5))) { return false; } } @@ -566,55 +560,55 @@ inline bool StringParser::string_to_bool_internal(const char* s, int len, ParseR return false; } -template<> +template <> __int128 StringParser::numeric_limits<__int128>(bool negative); -template +template T StringParser::numeric_limits(bool negative) { return negative ? std::numeric_limits::min() : std::numeric_limits::max(); } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 3; } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 5; } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 10; } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 20; } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 3; } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 5; } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 10; } -template<> +template <> inline int StringParser::StringParseTraits::max_ascii_len() { return 19; } -template<> +template <> inline int StringParser::StringParseTraits<__int128>::max_ascii_len() { return 39; } @@ -622,53 +616,53 @@ inline int StringParser::StringParseTraits<__int128>::max_ascii_len() { inline __int128 StringParser::get_scale_multiplier(int scale) { DCHECK_GE(scale, 0); static const __int128 values[] = { - static_cast<__int128>(1ll), - static_cast<__int128>(10ll), - static_cast<__int128>(100ll), - static_cast<__int128>(1000ll), - static_cast<__int128>(10000ll), - static_cast<__int128>(100000ll), - static_cast<__int128>(1000000ll), - static_cast<__int128>(10000000ll), - static_cast<__int128>(100000000ll), - static_cast<__int128>(1000000000ll), - static_cast<__int128>(10000000000ll), - static_cast<__int128>(100000000000ll), - static_cast<__int128>(1000000000000ll), - static_cast<__int128>(10000000000000ll), - static_cast<__int128>(100000000000000ll), - static_cast<__int128>(1000000000000000ll), - static_cast<__int128>(10000000000000000ll), - static_cast<__int128>(100000000000000000ll), - static_cast<__int128>(1000000000000000000ll), - static_cast<__int128>(1000000000000000000ll) * 10ll, - static_cast<__int128>(1000000000000000000ll) * 100ll, - static_cast<__int128>(1000000000000000000ll) * 1000ll, - static_cast<__int128>(1000000000000000000ll) * 10000ll, - static_cast<__int128>(1000000000000000000ll) * 100000ll, - static_cast<__int128>(1000000000000000000ll) * 1000000ll, - static_cast<__int128>(1000000000000000000ll) * 10000000ll, - static_cast<__int128>(1000000000000000000ll) * 100000000ll, - static_cast<__int128>(1000000000000000000ll) * 1000000000ll, - static_cast<__int128>(1000000000000000000ll) * 10000000000ll, - static_cast<__int128>(1000000000000000000ll) * 100000000000ll, - static_cast<__int128>(1000000000000000000ll) * 1000000000000ll, - static_cast<__int128>(1000000000000000000ll) * 10000000000000ll, - static_cast<__int128>(1000000000000000000ll) * 100000000000000ll, - static_cast<__int128>(1000000000000000000ll) * 1000000000000000ll, - static_cast<__int128>(1000000000000000000ll) * 10000000000000000ll, - static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll, - static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 10ll, - static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 100ll, - static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 1000ll}; + static_cast<__int128>(1ll), + static_cast<__int128>(10ll), + static_cast<__int128>(100ll), + static_cast<__int128>(1000ll), + static_cast<__int128>(10000ll), + static_cast<__int128>(100000ll), + static_cast<__int128>(1000000ll), + static_cast<__int128>(10000000ll), + static_cast<__int128>(100000000ll), + static_cast<__int128>(1000000000ll), + static_cast<__int128>(10000000000ll), + static_cast<__int128>(100000000000ll), + static_cast<__int128>(1000000000000ll), + static_cast<__int128>(10000000000000ll), + static_cast<__int128>(100000000000000ll), + static_cast<__int128>(1000000000000000ll), + static_cast<__int128>(10000000000000000ll), + static_cast<__int128>(100000000000000000ll), + static_cast<__int128>(1000000000000000000ll), + static_cast<__int128>(1000000000000000000ll) * 10ll, + static_cast<__int128>(1000000000000000000ll) * 100ll, + static_cast<__int128>(1000000000000000000ll) * 1000ll, + static_cast<__int128>(1000000000000000000ll) * 10000ll, + static_cast<__int128>(1000000000000000000ll) * 100000ll, + static_cast<__int128>(1000000000000000000ll) * 1000000ll, + static_cast<__int128>(1000000000000000000ll) * 10000000ll, + static_cast<__int128>(1000000000000000000ll) * 100000000ll, + static_cast<__int128>(1000000000000000000ll) * 1000000000ll, + static_cast<__int128>(1000000000000000000ll) * 10000000000ll, + static_cast<__int128>(1000000000000000000ll) * 100000000000ll, + static_cast<__int128>(1000000000000000000ll) * 1000000000000ll, + static_cast<__int128>(1000000000000000000ll) * 10000000000000ll, + static_cast<__int128>(1000000000000000000ll) * 100000000000000ll, + static_cast<__int128>(1000000000000000000ll) * 1000000000000000ll, + static_cast<__int128>(1000000000000000000ll) * 10000000000000000ll, + static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll, + static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 10ll, + static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 100ll, + static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 1000ll}; if (scale >= 0 && scale < 39) { return values[scale]; } - return -1; // Overflow + return -1; // Overflow } -inline __int128 StringParser::string_to_decimal(const char* s, int len, - int type_precision, int type_scale, ParseResult* result) { +inline __int128 StringParser::string_to_decimal(const char* s, int len, int type_precision, + int type_scale, ParseResult* result) { // Special cases: // 1) '' == Fail, an empty string fails to parse. // 2) ' # ' == #, leading and trailing white space is ignored. @@ -687,11 +681,11 @@ inline __int128 StringParser::string_to_decimal(const char* s, int len, bool is_negative = false; if (len > 0) { switch (*s) { - case '-': - is_negative = true; - case '+': - ++s; - --len; + case '-': + is_negative = true; + case '+': + ++s; + --len; } } @@ -733,9 +727,9 @@ inline __int128 StringParser::string_to_decimal(const char* s, int len, // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and // an exponent will be made later. if (LIKELY(type_precision > precision)) { - value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... + value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... } - DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. + DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. ++precision; scale += found_dot; } else if (c == '.' && LIKELY(!found_dot)) { diff --git a/be/src/util/tdigest.h b/be/src/util/tdigest.h index 2955a50f94..85cfe7c477 100644 --- a/be/src/util/tdigest.h +++ b/be/src/util/tdigest.h @@ -430,7 +430,7 @@ public: } void add(std::vector::const_iterator iter, - std::vector::const_iterator end) { + std::vector::const_iterator end) { while (iter != end) { const size_t diff = std::distance(iter, end); const size_t room = _max_unprocessed - _unprocessed.size(); diff --git a/be/src/util/time.h b/be/src/util/time.h index eee4880d85..f542b349d8 100644 --- a/be/src/util/time.h +++ b/be/src/util/time.h @@ -27,7 +27,7 @@ #include #define NANOS_PER_SEC 1000000000ll -#define NANOS_PER_MILLIS 1000000ll +#define NANOS_PER_MILLIS 1000000ll #define NANOS_PER_MICRO 1000ll #define MICROS_PER_SEC 1000000ll #define MICROS_PER_MILLI 1000ll diff --git a/be/src/util/topn_counter.cpp b/be/src/util/topn_counter.cpp index bb6052fe78..9f145fdff4 100644 --- a/be/src/util/topn_counter.cpp +++ b/be/src/util/topn_counter.cpp @@ -40,8 +40,8 @@ void TopNCounter::serialize(std::string* buffer) { PTopNCounter topn_counter; topn_counter.set_top_num(_top_num); topn_counter.set_space_expand_rate(_space_expand_rate); - for(std::vector::const_iterator it = _counter_vec->begin(); it != _counter_vec->end(); ++it) - { + for (std::vector::const_iterator it = _counter_vec->begin(); it != _counter_vec->end(); + ++it) { PCounter* counter = topn_counter.add_counter(); counter->set_item(it->get_item()); counter->set_count(it->get_count()); @@ -49,7 +49,7 @@ void TopNCounter::serialize(std::string* buffer) { topn_counter.SerializeToString(buffer); } -bool TopNCounter::deserialize(const doris::Slice &src) { +bool TopNCounter::deserialize(const doris::Slice& src) { PTopNCounter topn_counter; if (!topn_counter.ParseFromArray(src.data, src.size)) { LOG(WARNING) << "topn counter deserialize failed"; @@ -60,7 +60,8 @@ bool TopNCounter::deserialize(const doris::Slice &src) { set_top_num(topn_counter.top_num()); for (int i = 0; i < topn_counter.counter_size(); ++i) { const PCounter& counter = topn_counter.counter(i); - _counter_map->insert(std::make_pair(counter.item(), Counter(counter.item(), counter.count()))); + _counter_map->insert( + std::make_pair(counter.item(), Counter(counter.item(), counter.count()))); _counter_vec->emplace_back(counter.item(), counter.count()); } _ordered = true; @@ -74,14 +75,15 @@ void TopNCounter::sort_retain(uint32_t capacity) { } void TopNCounter::sort_retain(uint32_t capacity, std::vector* sort_vec) { - for(std::unordered_map::const_iterator it = _counter_map->begin(); it != _counter_map->end(); ++it) { + for (std::unordered_map::const_iterator it = _counter_map->begin(); + it != _counter_map->end(); ++it) { sort_vec->emplace_back(it->second.get_item(), it->second.get_count()); } std::sort(sort_vec->begin(), sort_vec->end(), TopNComparator()); if (sort_vec->size() > capacity) { for (uint32_t i = 0, n = sort_vec->size() - capacity; i < n; ++i) { - auto &counter = sort_vec->back(); + auto& counter = sort_vec->back(); _counter_map->erase(counter.get_item()); sort_vec->pop_back(); } @@ -90,7 +92,7 @@ void TopNCounter::sort_retain(uint32_t capacity, std::vector* sort_vec) // Based on the parallel version of the Space Saving algorithm as described in: // A parallel space saving algorithm for frequent items and the Hurwitz zeta distribution by Massimo Cafaro, et al. -void TopNCounter::merge(doris::TopNCounter &&other) { +void TopNCounter::merge(doris::TopNCounter&& other) { if (other._counter_map->size() == 0) { return; } @@ -104,18 +106,19 @@ void TopNCounter::merge(doris::TopNCounter &&other) { uint64_t m2 = another_full ? other._counter_vec->back().get_count() : 0; if (another_full == true) { - for (auto &entry : *(this->_counter_map)) { + for (auto& entry : *(this->_counter_map)) { entry.second.add_count(m2); } } - for (auto &other_entry : *(other._counter_map)) { + for (auto& other_entry : *(other._counter_map)) { auto itr = this->_counter_map->find(other_entry.first); if (itr != _counter_map->end()) { itr->second.add_count(other_entry.second.get_count() - m2); } else { - this->_counter_map->insert(std::make_pair(other_entry.first, - Counter(other_entry.first,other_entry.second.get_count() + m1))); + this->_counter_map->insert(std::make_pair( + other_entry.first, + Counter(other_entry.first, other_entry.second.get_count() + m1))); } } _ordered = false; @@ -131,7 +134,8 @@ void TopNCounter::finalize(std::string& finalize_str) { rapidjson::Writer writer(buffer); uint32_t k = 0; writer.StartObject(); - for (std::vector::const_iterator it = _counter_vec->begin(); it != _counter_vec->end() && k < _top_num; ++it, ++k) { + for (std::vector::const_iterator it = _counter_vec->begin(); + it != _counter_vec->end() && k < _top_num; ++it, ++k) { writer.Key(it->get_item().data()); writer.Uint64(it->get_count()); } @@ -139,4 +143,4 @@ void TopNCounter::finalize(std::string& finalize_str) { finalize_str = buffer.GetString(); } -} +} // namespace doris diff --git a/be/src/util/tuple_row_zorder_compare.cpp b/be/src/util/tuple_row_zorder_compare.cpp index cd27c6b5cf..4f66a60ded 100644 --- a/be/src/util/tuple_row_zorder_compare.cpp +++ b/be/src/util/tuple_row_zorder_compare.cpp @@ -19,216 +19,214 @@ namespace doris { - RowComparator::RowComparator(Schema* schema) { - - } - - int RowComparator::operator()(const char* left, const char* right) const { - return -1; - } - - TupleRowZOrderComparator::TupleRowZOrderComparator() { - _schema = nullptr; - _sort_col_num = 0; - } - - TupleRowZOrderComparator::TupleRowZOrderComparator(int sort_col_num) { - _schema = nullptr; - _sort_col_num = sort_col_num; - } - - TupleRowZOrderComparator::TupleRowZOrderComparator(Schema* schema, int sort_col_num) - :_schema(schema), _sort_col_num(sort_col_num) { - _max_col_size = get_type_byte_size(_schema->column(0)->type()); - for (size_t i = 1; i < _sort_col_num; ++i) { - if (_max_col_size < get_type_byte_size(_schema->column(i)->type())) { - _max_col_size = get_type_byte_size(_schema->column(i)->type()); - } - } - } - - int TupleRowZOrderComparator::compare(const char* lhs, const char* rhs) const { - ContiguousRow lhs_row(_schema, lhs); - ContiguousRow rhs_row(_schema, rhs); - if (_max_col_size <= 4) { - return compare_based_on_size(lhs_row, rhs_row); - } else if (_max_col_size <= 8) { - return compare_based_on_size(lhs_row, rhs_row); - } else { - return compare_based_on_size(lhs_row, rhs_row); - } - } - - void TupleRowZOrderComparator::max_col_size(const RowCursor& rc) { - _max_col_size = get_type_byte_size(rc.schema()->column(0)->type()); - for (size_t i = 1; i < _sort_col_num; ++i) { - if (_max_col_size < get_type_byte_size(rc.schema()->column(i)->type())) { - _max_col_size = get_type_byte_size(rc.schema()->column(i)->type()); - } - } - } - - int TupleRowZOrderComparator::compare_row(const RowCursor& lhs, const RowCursor& rhs) { - max_col_size(lhs); - if (_max_col_size <= 4) { - return compare_based_on_size(lhs, rhs); - } else if (_max_col_size <= 8) { - return compare_based_on_size(lhs, lhs); - } else { - return compare_based_on_size(lhs, lhs); - } - } - - template - int TupleRowZOrderComparator::compare_based_on_size(LhsRowType& lhs, LhsRowType& rhs) const { - auto less_msb = [](U x, U y) { return x < y && x < (x ^ y); }; - FieldType type = lhs.schema()->column(0)->type(); - U msd_lhs = get_shared_representation(lhs.cell(0).is_null() ? nullptr : lhs.cell(0).cell_ptr(), - type); - U msd_rhs = get_shared_representation(rhs.cell(0).is_null() ? nullptr : rhs.cell(0).cell_ptr(), - type); - for (int i = 1; i < _sort_col_num; ++i) { - type = lhs.schema()->column(i)->type(); - const void *lhs_v = lhs.cell(i).is_null() ? nullptr : lhs.cell(i).cell_ptr(); - const void *rhs_v = rhs.cell(i).is_null() ? nullptr : rhs.cell(i).cell_ptr(); - U lhsi = get_shared_representation(lhs_v, type); - U rhsi = get_shared_representation(rhs_v, type); - if (less_msb(msd_lhs ^ msd_rhs, lhsi ^ rhsi)) { - msd_lhs = lhsi; - msd_rhs = rhsi; - } - } - return msd_lhs < msd_rhs ? -1 : (msd_lhs > msd_rhs ? 1 : 0); - } - - template - U TupleRowZOrderComparator::get_shared_representation(const void *val, FieldType type) const { - // The mask used for setting the sign bit correctly. - if (val == NULL) return 0; - constexpr U mask = (U) 1 << (sizeof(U) * 8 - 1); - switch (type) { - case FieldType::OLAP_FIELD_TYPE_NONE: - return 0; - case FieldType::OLAP_FIELD_TYPE_BOOL: - return static_cast(*reinterpret_cast(val)) << (sizeof(U) * 8 - 1); - case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: - case FieldType::OLAP_FIELD_TYPE_TINYINT: - return get_shared_int_representation( - *reinterpret_cast(val), mask); - case FieldType::OLAP_FIELD_TYPE_SMALLINT: - case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: - return get_shared_int_representation( - *reinterpret_cast(val), mask); - case FieldType::OLAP_FIELD_TYPE_INT: - return get_shared_int_representation( - *reinterpret_cast(val), mask); - case FieldType::OLAP_FIELD_TYPE_DATETIME: - case FieldType::OLAP_FIELD_TYPE_DATE: - case FieldType::OLAP_FIELD_TYPE_BIGINT: - case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: - return get_shared_int_representation( - *reinterpret_cast(val), mask); - case FieldType::OLAP_FIELD_TYPE_LARGEINT: - return static_cast(*reinterpret_cast(val)) ^ mask; - case FieldType::OLAP_FIELD_TYPE_FLOAT: - return get_shared_float_representation(val, mask); - case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: - case FieldType::OLAP_FIELD_TYPE_DOUBLE: - return get_shared_float_representation(val, mask); - case FieldType::OLAP_FIELD_TYPE_CHAR: - case FieldType::OLAP_FIELD_TYPE_VARCHAR:{ - const StringValue *string_value = reinterpret_cast(val); - return get_shared_string_representation(string_value->ptr, string_value->len); - } - case FieldType::OLAP_FIELD_TYPE_DECIMAL: { - decimal12_t decimal_val = *reinterpret_cast(val); - int128_t value = decimal_val.integer*DecimalV2Value::ONE_BILLION + decimal_val.fraction; - return static_cast(value) ^ mask; - } - default: - return 0; - } - } - - template - U TupleRowZOrderComparator::get_shared_int_representation(const T val, U mask) const { - uint64_t shift_size = static_cast( - std::max(static_cast((sizeof(U) - sizeof(T)) * 8), (int64_t) 0)); - return (static_cast(val) << shift_size) ^ mask; - } - - template - U TupleRowZOrderComparator::get_shared_float_representation(const void *val, U mask) const { - int64_t tmp; - T floating_value = *reinterpret_cast(val); - memcpy(&tmp, &floating_value, sizeof(T)); - if (UNLIKELY(std::isnan(floating_value))) return 0; - // "int" is enough because U and T are only primitive type - int s = (int)((sizeof(U) - sizeof(T)) * 8); - if (floating_value < 0.0) { - // Flipping all bits for negative values. - return static_cast(~tmp) << std::max(s, 0); - } else { - // Flipping only first bit. - return (static_cast(tmp) << std::max(s, 0)) ^ mask; - } - } - - template - U TupleRowZOrderComparator::get_shared_string_representation(const char *char_ptr, - int length) const { - int len = length < sizeof(U) ? length : sizeof(U); - if (len == 0) return 0; - U dst = 0; - // We copy the bytes from the string but swap the bytes because of integer endianness. - BitUtil::ByteSwapScalar(&dst, char_ptr, len); - return dst << ((sizeof(U) - len) * 8); - } - - int TupleRowZOrderComparator::operator()(const char* lhs, const char* rhs) const { - int result = compare(lhs, rhs); - return result; - } - - int TupleRowZOrderComparator::get_type_byte_size(FieldType type) const { - switch (type) { - case FieldType::OLAP_FIELD_TYPE_OBJECT: - case FieldType::OLAP_FIELD_TYPE_HLL: - case FieldType::OLAP_FIELD_TYPE_STRUCT: - case FieldType::OLAP_FIELD_TYPE_ARRAY: - case FieldType::OLAP_FIELD_TYPE_MAP: - case FieldType::OLAP_FIELD_TYPE_CHAR: - case FieldType::OLAP_FIELD_TYPE_VARCHAR: - return 0; - case FieldType::OLAP_FIELD_TYPE_NONE: - case FieldType::OLAP_FIELD_TYPE_BOOL: - case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: - case FieldType::OLAP_FIELD_TYPE_TINYINT: - return 1; - case FieldType::OLAP_FIELD_TYPE_SMALLINT: - case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: - return 2; - case FieldType::OLAP_FIELD_TYPE_FLOAT: - case FieldType::OLAP_FIELD_TYPE_INT: - case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: - return 4; - case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: - case FieldType::OLAP_FIELD_TYPE_DOUBLE: - case FieldType::OLAP_FIELD_TYPE_BIGINT: - case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: - return 8; - case FieldType::OLAP_FIELD_TYPE_DECIMAL: - case FieldType::OLAP_FIELD_TYPE_LARGEINT: - case FieldType::OLAP_FIELD_TYPE_DATETIME: - case FieldType::OLAP_FIELD_TYPE_DATE: - return 16; - case FieldType::OLAP_FIELD_TYPE_UNKNOWN: - DCHECK(false); - break; - default: - DCHECK(false); - } - return -1; - } +RowComparator::RowComparator(Schema* schema) {} +int RowComparator::operator()(const char* left, const char* right) const { + return -1; } + +TupleRowZOrderComparator::TupleRowZOrderComparator() { + _schema = nullptr; + _sort_col_num = 0; +} + +TupleRowZOrderComparator::TupleRowZOrderComparator(int sort_col_num) { + _schema = nullptr; + _sort_col_num = sort_col_num; +} + +TupleRowZOrderComparator::TupleRowZOrderComparator(Schema* schema, int sort_col_num) + : _schema(schema), _sort_col_num(sort_col_num) { + _max_col_size = get_type_byte_size(_schema->column(0)->type()); + for (size_t i = 1; i < _sort_col_num; ++i) { + if (_max_col_size < get_type_byte_size(_schema->column(i)->type())) { + _max_col_size = get_type_byte_size(_schema->column(i)->type()); + } + } +} + +int TupleRowZOrderComparator::compare(const char* lhs, const char* rhs) const { + ContiguousRow lhs_row(_schema, lhs); + ContiguousRow rhs_row(_schema, rhs); + if (_max_col_size <= 4) { + return compare_based_on_size(lhs_row, rhs_row); + } else if (_max_col_size <= 8) { + return compare_based_on_size(lhs_row, rhs_row); + } else { + return compare_based_on_size(lhs_row, rhs_row); + } +} + +void TupleRowZOrderComparator::max_col_size(const RowCursor& rc) { + _max_col_size = get_type_byte_size(rc.schema()->column(0)->type()); + for (size_t i = 1; i < _sort_col_num; ++i) { + if (_max_col_size < get_type_byte_size(rc.schema()->column(i)->type())) { + _max_col_size = get_type_byte_size(rc.schema()->column(i)->type()); + } + } +} + +int TupleRowZOrderComparator::compare_row(const RowCursor& lhs, const RowCursor& rhs) { + max_col_size(lhs); + if (_max_col_size <= 4) { + return compare_based_on_size(lhs, rhs); + } else if (_max_col_size <= 8) { + return compare_based_on_size(lhs, lhs); + } else { + return compare_based_on_size(lhs, lhs); + } +} + +template +int TupleRowZOrderComparator::compare_based_on_size(LhsRowType& lhs, LhsRowType& rhs) const { + auto less_msb = [](U x, U y) { return x < y && x < (x ^ y); }; + FieldType type = lhs.schema()->column(0)->type(); + U msd_lhs = get_shared_representation( + lhs.cell(0).is_null() ? nullptr : lhs.cell(0).cell_ptr(), type); + U msd_rhs = get_shared_representation( + rhs.cell(0).is_null() ? nullptr : rhs.cell(0).cell_ptr(), type); + for (int i = 1; i < _sort_col_num; ++i) { + type = lhs.schema()->column(i)->type(); + const void* lhs_v = lhs.cell(i).is_null() ? nullptr : lhs.cell(i).cell_ptr(); + const void* rhs_v = rhs.cell(i).is_null() ? nullptr : rhs.cell(i).cell_ptr(); + U lhsi = get_shared_representation(lhs_v, type); + U rhsi = get_shared_representation(rhs_v, type); + if (less_msb(msd_lhs ^ msd_rhs, lhsi ^ rhsi)) { + msd_lhs = lhsi; + msd_rhs = rhsi; + } + } + return msd_lhs < msd_rhs ? -1 : (msd_lhs > msd_rhs ? 1 : 0); +} + +template +U TupleRowZOrderComparator::get_shared_representation(const void* val, FieldType type) const { + // The mask used for setting the sign bit correctly. + if (val == NULL) return 0; + constexpr U mask = (U)1 << (sizeof(U) * 8 - 1); + switch (type) { + case FieldType::OLAP_FIELD_TYPE_NONE: + return 0; + case FieldType::OLAP_FIELD_TYPE_BOOL: + return static_cast(*reinterpret_cast(val)) << (sizeof(U) * 8 - 1); + case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + case FieldType::OLAP_FIELD_TYPE_TINYINT: + return get_shared_int_representation(*reinterpret_cast(val), + mask); + case FieldType::OLAP_FIELD_TYPE_SMALLINT: + case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + return get_shared_int_representation(*reinterpret_cast(val), + mask); + case FieldType::OLAP_FIELD_TYPE_INT: + return get_shared_int_representation(*reinterpret_cast(val), + mask); + case FieldType::OLAP_FIELD_TYPE_DATETIME: + case FieldType::OLAP_FIELD_TYPE_DATE: + case FieldType::OLAP_FIELD_TYPE_BIGINT: + case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + return get_shared_int_representation(*reinterpret_cast(val), + mask); + case FieldType::OLAP_FIELD_TYPE_LARGEINT: + return static_cast(*reinterpret_cast(val)) ^ mask; + case FieldType::OLAP_FIELD_TYPE_FLOAT: + return get_shared_float_representation(val, mask); + case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: + case FieldType::OLAP_FIELD_TYPE_DOUBLE: + return get_shared_float_representation(val, mask); + case FieldType::OLAP_FIELD_TYPE_CHAR: + case FieldType::OLAP_FIELD_TYPE_VARCHAR: { + const StringValue* string_value = reinterpret_cast(val); + return get_shared_string_representation(string_value->ptr, string_value->len); + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL: { + decimal12_t decimal_val = *reinterpret_cast(val); + int128_t value = decimal_val.integer * DecimalV2Value::ONE_BILLION + decimal_val.fraction; + return static_cast(value) ^ mask; + } + default: + return 0; + } +} + +template +U TupleRowZOrderComparator::get_shared_int_representation(const T val, U mask) const { + uint64_t shift_size = static_cast( + std::max(static_cast((sizeof(U) - sizeof(T)) * 8), (int64_t)0)); + return (static_cast(val) << shift_size) ^ mask; +} + +template +U TupleRowZOrderComparator::get_shared_float_representation(const void* val, U mask) const { + int64_t tmp; + T floating_value = *reinterpret_cast(val); + memcpy(&tmp, &floating_value, sizeof(T)); + if (UNLIKELY(std::isnan(floating_value))) return 0; + // "int" is enough because U and T are only primitive type + int s = (int)((sizeof(U) - sizeof(T)) * 8); + if (floating_value < 0.0) { + // Flipping all bits for negative values. + return static_cast(~tmp) << std::max(s, 0); + } else { + // Flipping only first bit. + return (static_cast(tmp) << std::max(s, 0)) ^ mask; + } +} + +template +U TupleRowZOrderComparator::get_shared_string_representation(const char* char_ptr, + int length) const { + int len = length < sizeof(U) ? length : sizeof(U); + if (len == 0) return 0; + U dst = 0; + // We copy the bytes from the string but swap the bytes because of integer endianness. + BitUtil::ByteSwapScalar(&dst, char_ptr, len); + return dst << ((sizeof(U) - len) * 8); +} + +int TupleRowZOrderComparator::operator()(const char* lhs, const char* rhs) const { + int result = compare(lhs, rhs); + return result; +} + +int TupleRowZOrderComparator::get_type_byte_size(FieldType type) const { + switch (type) { + case FieldType::OLAP_FIELD_TYPE_OBJECT: + case FieldType::OLAP_FIELD_TYPE_HLL: + case FieldType::OLAP_FIELD_TYPE_STRUCT: + case FieldType::OLAP_FIELD_TYPE_ARRAY: + case FieldType::OLAP_FIELD_TYPE_MAP: + case FieldType::OLAP_FIELD_TYPE_CHAR: + case FieldType::OLAP_FIELD_TYPE_VARCHAR: + return 0; + case FieldType::OLAP_FIELD_TYPE_NONE: + case FieldType::OLAP_FIELD_TYPE_BOOL: + case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + case FieldType::OLAP_FIELD_TYPE_TINYINT: + return 1; + case FieldType::OLAP_FIELD_TYPE_SMALLINT: + case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + return 2; + case FieldType::OLAP_FIELD_TYPE_FLOAT: + case FieldType::OLAP_FIELD_TYPE_INT: + case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: + return 4; + case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: + case FieldType::OLAP_FIELD_TYPE_DOUBLE: + case FieldType::OLAP_FIELD_TYPE_BIGINT: + case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + return 8; + case FieldType::OLAP_FIELD_TYPE_DECIMAL: + case FieldType::OLAP_FIELD_TYPE_LARGEINT: + case FieldType::OLAP_FIELD_TYPE_DATETIME: + case FieldType::OLAP_FIELD_TYPE_DATE: + return 16; + case FieldType::OLAP_FIELD_TYPE_UNKNOWN: + DCHECK(false); + break; + default: + DCHECK(false); + } + return -1; +} + +} // namespace doris diff --git a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h index e2d4b8e3f9..113cd992b9 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h +++ b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h @@ -36,8 +36,8 @@ struct PercentileApproxState { void init(double compression = 10000) { if (!init_flag) { //https://doris.apache.org/zh-CN/sql-reference/sql-functions/aggregate-functions/percentile_approx.html#description - //The compression parameter setting range is [2048, 10000]. - //If the value of compression parameter is not specified set, or is outside the range of [2048, 10000], + //The compression parameter setting range is [2048, 10000]. + //If the value of compression parameter is not specified set, or is outside the range of [2048, 10000], //will use the default value of 10000 if (compression < 2048 || compression > 10000) { compression = 10000; diff --git a/be/src/vec/aggregate_functions/aggregate_function_reader.cpp b/be/src/vec/aggregate_functions/aggregate_function_reader.cpp index f90515fd5e..6b49f7390e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_reader.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_reader.cpp @@ -22,8 +22,7 @@ namespace doris::vectorized { // auto spread at nullable condition, null value do not participate aggregate void register_aggregate_function_reader_load(AggregateFunctionSimpleFactory& factory) { // add a suffix to the function name here to distinguish special functions of agg reader - auto register_function = [&](const std::string& name, - const AggregateFunctionCreator& creator) { + auto register_function = [&](const std::string& name, const AggregateFunctionCreator& creator) { factory.register_function(name + AGG_READER_SUFFIX, creator, false); factory.register_function(name + AGG_LOAD_SUFFIX, creator, false); }; @@ -45,13 +44,18 @@ void register_aggregate_function_replace_reader_load(AggregateFunctionSimpleFact factory.register_function(name + suffix, creator, nullable); }; - register_function("replace", AGG_READER_SUFFIX, create_aggregate_function_first, false); - register_function("replace", AGG_READER_SUFFIX, create_aggregate_function_first, true); - register_function("replace", AGG_LOAD_SUFFIX, create_aggregate_function_last, false); + register_function("replace", AGG_READER_SUFFIX, create_aggregate_function_first, + false); + register_function("replace", AGG_READER_SUFFIX, create_aggregate_function_first, + true); + register_function("replace", AGG_LOAD_SUFFIX, create_aggregate_function_last, + false); register_function("replace", AGG_LOAD_SUFFIX, create_aggregate_function_last, true); - register_function("replace_if_not_null", AGG_READER_SUFFIX, create_aggregate_function_first, false); - register_function("replace_if_not_null", AGG_LOAD_SUFFIX, create_aggregate_function_last, false); + register_function("replace_if_not_null", AGG_READER_SUFFIX, + create_aggregate_function_first, false); + register_function("replace_if_not_null", AGG_LOAD_SUFFIX, + create_aggregate_function_last, false); } } // namespace doris::vectorized diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp index 4931958276..0afa9b97d3 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp @@ -58,7 +58,8 @@ AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() { register_aggregate_function_uniq(instance); register_aggregate_function_bitmap(instance); register_aggregate_function_combinator_distinct(instance); - register_aggregate_function_reader_load(instance); // register aggregate function for agg reader + register_aggregate_function_reader_load( + instance); // register aggregate function for agg reader register_aggregate_function_window_rank(instance); register_aggregate_function_stddev_variance_pop(instance); register_aggregate_function_topn(instance); diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index c438cd3582..8952ef2b44 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -451,8 +451,8 @@ AggregateFunctionPtr create_aggregate_function_first(const std::string& name, const Array& parameters, bool result_is_nullable) { return AggregateFunctionPtr( - create_function_single_value( - name, argument_types, parameters)); + create_function_single_value(name, argument_types, parameters)); } template @@ -461,8 +461,8 @@ AggregateFunctionPtr create_aggregate_function_last(const std::string& name, const Array& parameters, bool result_is_nullable) { return AggregateFunctionPtr( - create_function_single_value( - name, argument_types, parameters)); + create_function_single_value(name, argument_types, parameters)); } } // namespace doris::vectorized diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h index 8f42a3398c..7b62ea2ad6 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h @@ -126,7 +126,7 @@ struct WindowFunnelState { sorted = true; } - void write(BufferWritable &out) const { + void write(BufferWritable& out) const { write_var_int(max_event_level, out); write_var_int(window, out); write_var_int(events.size(), out); @@ -161,31 +161,30 @@ struct WindowFunnelState { }; class AggregateFunctionWindowFunnel - : public IAggregateFunctionDataHelper { + : public IAggregateFunctionDataHelper { public: AggregateFunctionWindowFunnel(const DataTypes& argument_types_) - : IAggregateFunctionDataHelper(argument_types_, {}) { - } + : IAggregateFunctionDataHelper( + argument_types_, {}) {} String get_name() const override { return "window_funnel"; } - DataTypePtr get_return_type() const override { - return std::make_shared(); - } + DataTypePtr get_return_type() const override { return std::make_shared(); } void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num, Arena*) const override { - const auto& window = static_cast&>(*columns[0]).get_data()[row_num]; + const auto& window = + static_cast&>(*columns[0]).get_data()[row_num]; // TODO: handle mode in the future. // be/src/olap/row_block2.cpp copy_data_to_column - const auto& timestamp = static_cast&>(*columns[2]).get_data()[row_num]; + const auto& timestamp = + static_cast&>(*columns[2]).get_data()[row_num]; const int NON_EVENT_NUM = 3; for (int i = NON_EVENT_NUM; i < get_argument_types().size(); i++) { - const auto& is_set = static_cast&>(*columns[i]).get_data()[row_num]; + const auto& is_set = + static_cast&>(*columns[i]).get_data()[row_num]; if (is_set) { this->data(place).add(timestamp, i - NON_EVENT_NUM, get_argument_types().size() - NON_EVENT_NUM, window); diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 610babd91a..1dd74261d8 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -168,13 +168,14 @@ public: virtual void insert_many_from(const IColumn& src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) insert_from(src, position); } - + /// Appends a batch elements from other column with the same type /// indices_begin + indices_end represent the row indices of column src /// Warning: /// if *indices == -1 means the row is null, only use in outer join, do not use in any other place /// insert JOIN_NULL_HINT in null map to hint the null is produced by outer join - virtual void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) = 0; + virtual void insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) = 0; /// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented). /// Is used to optimize some computations (in aggregation, for example). @@ -183,17 +184,19 @@ public: virtual void insert_data(const char* pos, size_t length) = 0; virtual void insert_many_fix_len_data(const char* pos, size_t num) { - LOG(FATAL) << "Method insert_many_fix_len_data is not supported for " << get_name(); + LOG(FATAL) << "Method insert_many_fix_len_data is not supported for " << get_name(); } // todo(zeno) Use dict_args temp object to cover all arguments virtual void insert_many_dict_data(const int32_t* data_array, size_t start_index, - const StringRef* dict, size_t data_num, uint32_t dict_num = 0) { - LOG(FATAL) << "Method insert_many_dict_data is not supported for " << get_name(); + const StringRef* dict, size_t data_num, + uint32_t dict_num = 0) { + LOG(FATAL) << "Method insert_many_dict_data is not supported for " << get_name(); } - - virtual void insert_many_binary_data(char* data_array, uint32_t* len_array, uint32_t* start_offset_array, size_t num) { - LOG(FATAL) << "Method insert_many_binary_data is not supported for " << get_name(); + + virtual void insert_many_binary_data(char* data_array, uint32_t* len_array, + uint32_t* start_offset_array, size_t num) { + LOG(FATAL) << "Method insert_many_binary_data is not supported for " << get_name(); } /// Appends "default value". diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index a7a87a44bd..35bfa45b52 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -62,7 +62,8 @@ public: data.push_back(*reinterpret_cast(pos)); } - void insert_many_binary_data(char* data_array, uint32_t* len_array, uint32_t* start_offset_array, size_t num) override { + void insert_many_binary_data(char* data_array, uint32_t* len_array, + uint32_t* start_offset_array, size_t num) override { if constexpr (std::is_same_v) { for (size_t i = 0; i < num; i++) { uint32_t len = len_array[i]; @@ -74,7 +75,7 @@ public: value.deserialize(data_array + start_offset); *pvalue = std::move(value); } else { - *pvalue = std::move(*reinterpret_cast(data_array + start_offset)); + *pvalue = std::move(*reinterpret_cast(data_array + start_offset)); } } } else if constexpr (std::is_same_v) { @@ -175,7 +176,7 @@ public: for (int i = 0; i < new_size; ++i) { auto offset = *(indices_begin + i); if (offset == -1) { - data.emplace_back(T{}); + data.emplace_back(T {}); } else { data.emplace_back(src_vec.get_element(offset)); } @@ -367,11 +368,11 @@ template struct is_complex : std::false_type {}; template <> -struct is_complex : std::true_type {}; +struct is_complex : std::true_type {}; //DataTypeBitMap::FieldType = BitmapValue template <> -struct is_complex : std::true_type {}; +struct is_complex : std::true_type {}; //DataTypeHLL::FieldType = HyperLogLog template diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 16be16692d..b557b68f86 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -84,7 +84,8 @@ public: s += length; } - void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override { + void insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) override { s += (indices_end - indices_begin); } diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index 2cf5cc6a55..d7112551d3 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -222,7 +222,8 @@ ColumnPtr ColumnDecimal::replicate(const IColumn::Offsets& offsets) const { } template -void ColumnDecimal::replicate(const uint32_t* counts, size_t target_size, IColumn& column) const { +void ColumnDecimal::replicate(const uint32_t* counts, size_t target_size, + IColumn& column) const { size_t size = data.size(); if (0 == size) return; diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 0b9c96a072..311dadb75d 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -107,7 +107,7 @@ public: for (int i = 0; i < new_size; ++i) { auto offset = *(indices_begin + i); - data[origin_size + i] = offset == -1 ? T{} : src_vec.get_element(offset); + data[origin_size + i] = offset == -1 ? T {} : src_vec.get_element(offset); } } diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 50f764a447..23aa4818b0 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -55,6 +55,7 @@ namespace doris::vectorized { template class ColumnDictionary final : public COWHelper> { static_assert(IsNumber); + private: friend class COWHelper; @@ -253,13 +254,9 @@ public: return _dict.find_code_by_bound(value, greater, eq); } - void generate_hash_values() { - _dict.generate_hash_values(); - } + void generate_hash_values() { _dict.generate_hash_values(); } - uint32_t get_hash_value(uint32_t idx) const { - return _dict.get_hash_value(_codes[idx]); - } + uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); } phmap::flat_hash_set find_codes( const phmap::flat_hash_set& values) const { @@ -308,7 +305,7 @@ public: } inline StringValue& get_value(T code) { return _dict_data[code]; } - + inline void generate_hash_values() { if (_hash_values.size() == 0) { _hash_values.resize(_dict_data.size()); @@ -320,9 +317,7 @@ public: } } - inline uint32_t get_hash_value(T code) const { - return _hash_values[code]; - } + inline uint32_t get_hash_value(T code) const { return _hash_values[code]; } // For > , code takes upper_bound - 1; For >= , code takes upper_bound // For < , code takes upper_bound; For <=, code takes upper_bound - 1 @@ -370,9 +365,7 @@ public: _hash_values.clear(); } - void clear_hash_values() { - _hash_values.clear(); - } + void clear_hash_values() { _hash_values.clear(); } void sort() { size_t dict_size = _dict_data.size(); @@ -397,7 +390,7 @@ public: phmap::flat_hash_map _code_convert_map; // hash value of origin string , used for bloom filter // It's a trade-off of space for performance - // But in TPC-DS 1GB q60,we see no significant improvement. + // But in TPC-DS 1GB q60,we see no significant improvement. // This may because the magnitude of the data is not large enough(in q60, only about 80k rows data is filtered for largest table) // So we may need more test here. HashValueContainer _hash_values; diff --git a/be/src/vec/columns/column_dummy.h b/be/src/vec/columns/column_dummy.h index cac3a063cf..4937e1391b 100644 --- a/be/src/vec/columns/column_dummy.h +++ b/be/src/vec/columns/column_dummy.h @@ -80,7 +80,8 @@ public: s += length; } - void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override { + void insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) override { s += (indices_end - indices_begin); } diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 69634effb4..f7cf9e9d3e 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -112,10 +112,10 @@ StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena, return StringRef(nested_ref.data - s, nested_ref.size + s); } - void ColumnNullable::insert_join_null_data() { - get_nested_column().insert_default(); - get_null_map_data().push_back(JOIN_NULL_HINT); - } +void ColumnNullable::insert_join_null_data() { + get_nested_column().insert_default(); + get_null_map_data().push_back(JOIN_NULL_HINT); +} const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) { UInt8 val = *reinterpret_cast(pos); @@ -137,10 +137,13 @@ void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t get_nested_column().insert_range_from(*nullable_col.nested_column, start, length); } -void ColumnNullable::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) { +void ColumnNullable::insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) { const ColumnNullable& src_concrete = assert_cast(src); - get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin, indices_end); - get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin, indices_end); + get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin, + indices_end); + get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin, + indices_end); } void ColumnNullable::insert(const Field& x) { @@ -192,8 +195,10 @@ Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, const ColumnNullable* nullable_col_ptr = reinterpret_cast(col_ptr); ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column; ColumnPtr null_map_ptr = nullable_col_ptr->null_map; - RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, const_cast(nest_col_ptr.get()))); - RETURN_IF_ERROR(get_null_map_column().filter_by_selector(sel, sel_size, const_cast(null_map_ptr.get()))); + RETURN_IF_ERROR(get_nested_column().filter_by_selector( + sel, sel_size, const_cast(nest_col_ptr.get()))); + RETURN_IF_ERROR(get_null_map_column().filter_by_selector( + sel, sel_size, const_cast(null_map_ptr.get()))); return Status::OK(); } diff --git a/be/src/vec/columns/column_set.h b/be/src/vec/columns/column_set.h index afd1ae44b0..f9f7fe8365 100644 --- a/be/src/vec/columns/column_set.h +++ b/be/src/vec/columns/column_set.h @@ -32,15 +32,14 @@ using ConstSetPtr = std::shared_ptr; * Behaves like a constant-column (because the set is one, not its own for each line). * This column has a nonstandard value, so it can not be obtained via a normal interface. */ -class ColumnSet final : public COWHelper -{ +class ColumnSet final : public COWHelper { public: friend class COWHelper; - ColumnSet(size_t s_, const ConstSetPtr & data_) : data(data_) { s = s_; } - ColumnSet(const ColumnSet &) = default; + ColumnSet(size_t s_, const ConstSetPtr& data_) : data(data_) { s = s_; } + ColumnSet(const ColumnSet&) = default; - const char * get_family_name() const override { return "Set"; } + const char* get_family_name() const override { return "Set"; } MutableColumnPtr clone_dummy(size_t s_) const override { return ColumnSet::create(s_, data); } ConstSetPtr get_data() const { return data; } @@ -49,4 +48,4 @@ private: ConstSetPtr data; }; -} \ No newline at end of file +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 98ebf673b4..f2242615cf 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -93,7 +93,8 @@ void ColumnString::insert_range_from(const IColumn& src, size_t start, size_t le } } -void ColumnString::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) { +void ColumnString::insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) { for (auto x = indices_begin; x != indices_end; ++x) { if (*x == -1) { ColumnString::insert_default(); diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index dfe1bce3b3..2d679432ed 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -219,7 +219,8 @@ void ColumnVector::insert_range_from(const IColumn& src, size_t start, size_t } template -void ColumnVector::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) { +void ColumnVector::insert_indices_from(const IColumn& src, const int* indices_begin, + const int* indices_end) { const Self& src_vec = assert_cast(src); auto origin_size = size(); auto new_size = indices_end - indices_begin; @@ -232,9 +233,10 @@ void ColumnVector::insert_indices_from(const IColumn& src, const int* indices // 1. nullable column : offset == -1 means is null at the here, set true here // 2. real data column : offset == -1 what at is meaningless // 3. JOIN_NULL_HINT only use in outer join to hint the null is produced by outer join - data[origin_size + i] = (offset == -1) ? T{JOIN_NULL_HINT} : src_vec.get_element(offset); + data[origin_size + i] = + (offset == -1) ? T {JOIN_NULL_HINT} : src_vec.get_element(offset); } else { - data[origin_size + i] = (offset == -1) ? T{0} : src_vec.get_element(offset); + data[origin_size + i] = (offset == -1) ? T {0} : src_vec.get_element(offset); } } } diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h index 0c8cfc6490..b5b158e1f7 100644 --- a/be/src/vec/common/allocator.h +++ b/be/src/vec/common/allocator.h @@ -101,9 +101,7 @@ template class Allocator { public: /// Allocate memory range. - void* alloc(size_t size, size_t alignment = 0) { - return alloc_no_track(size, alignment); - } + void* alloc(size_t size, size_t alignment = 0) { return alloc_no_track(size, alignment); } /// Free memory range. void free(void* buf, size_t size) { @@ -143,7 +141,7 @@ public: // On apple and freebsd self-implemented mremap used (common/mremap.h) buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (MAP_FAILED == buf){ + if (MAP_FAILED == buf) { RELEASE_THREAD_LOCAL_MEM_TRACKER(new_size - old_size); doris::vectorized::throwFromErrno("Allocator: Cannot mremap memory chunk from " + std::to_string(old_size) + " to " + diff --git a/be/src/vec/common/arena.h b/be/src/vec/common/arena.h index c46d790a82..8789f5ac35 100644 --- a/be/src/vec/common/arena.h +++ b/be/src/vec/common/arena.h @@ -34,7 +34,6 @@ #include "gutil/dynamic_annotations.h" - namespace doris::vectorized { /** Memory pool to append something. For example, short strings. @@ -60,7 +59,6 @@ private: Chunk* prev; Chunk(size_t size_, Chunk* prev_) { - begin = reinterpret_cast(Allocator::alloc(size_)); pos = begin; end = begin + size_ - pad_right; @@ -251,7 +249,8 @@ public: return res; } - char* aligned_realloc(const char* old_data, size_t old_size, size_t new_size, size_t alignment) { + char* aligned_realloc(const char* old_data, size_t old_size, size_t new_size, + size_t alignment) { char* res = aligned_alloc(new_size, alignment); if (old_data) { memcpy(res, old_data, old_size); diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h index 75e60b7e4f..afb6a685b4 100644 --- a/be/src/vec/common/columns_hashing.h +++ b/be/src/vec/common/columns_hashing.h @@ -190,7 +190,7 @@ struct HashMethodSingleLowNullableColumn : public SingleColumnMethod { using EmplaceResult = columns_hashing_impl::EmplaceResultImpl; using FindResult = columns_hashing_impl::FindResultImpl; - static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) { + static HashMethodContextPtr createContext(const HashMethodContext::Settings& settings) { return nullptr; } @@ -203,19 +203,20 @@ struct HashMethodSingleLowNullableColumn : public SingleColumnMethod { return {nested_col}; } - HashMethodSingleLowNullableColumn( - const ColumnRawPtrs & key_columns_nullable, const Sizes & key_sizes, const HashMethodContextPtr & context) - : Base(get_nested_column(key_columns_nullable[0]), key_sizes, context), key_columns(key_columns_nullable) { - } + HashMethodSingleLowNullableColumn(const ColumnRawPtrs& key_columns_nullable, + const Sizes& key_sizes, const HashMethodContextPtr& context) + : Base(get_nested_column(key_columns_nullable[0]), key_sizes, context), + key_columns(key_columns_nullable) {} template - ALWAYS_INLINE EmplaceResult emplace_key(Data & data, size_t row, Arena & pool) { + ALWAYS_INLINE EmplaceResult emplace_key(Data& data, size_t row, Arena& pool) { if (key_columns[0]->is_null_at(row)) { bool has_null_key = data.has_null_key_data(); data.has_null_key_data() = true; if constexpr (has_mapped) - return EmplaceResult(data.get_null_key_data(), data.get_null_key_data(), !has_null_key); + return EmplaceResult(data.get_null_key_data(), data.get_null_key_data(), + !has_null_key); else return EmplaceResult(!has_null_key); } @@ -227,13 +228,12 @@ struct HashMethodSingleLowNullableColumn : public SingleColumnMethod { data.emplace(key_holder, it, inserted); if constexpr (has_mapped) { - auto & mapped = *lookup_result_get_mapped(it); + auto& mapped = *lookup_result_get_mapped(it); if (inserted) { new (&mapped) Mapped(); } return EmplaceResult(mapped, mapped, inserted); - } - else + } else return EmplaceResult(inserted); } }; diff --git a/be/src/vec/common/hash_table/fixed_hash_map.h b/be/src/vec/common/hash_table/fixed_hash_map.h index 075ada2d3b..1985885dda 100644 --- a/be/src/vec/common/hash_table/fixed_hash_map.h +++ b/be/src/vec/common/hash_table/fixed_hash_map.h @@ -35,36 +35,36 @@ struct FixedHashMapCell { Mapped mapped; FixedHashMapCell() {} - FixedHashMapCell(const Key &, const State &) : full(true) {} - FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {} + FixedHashMapCell(const Key&, const State&) : full(true) {} + FixedHashMapCell(const value_type& value_, const State&) : full(true), mapped(value_.second) {} const VoidKey get_key() const { return {}; } - Mapped & get_mapped() { return mapped; } - const Mapped & get_mapped() const { return mapped; } + Mapped& get_mapped() { return mapped; } + const Mapped& get_mapped() const { return mapped; } - bool is_zero(const State &) const { return !full; } + bool is_zero(const State&) const { return !full; } void set_zero() { full = false; } /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field. /// Note that we have to assemble a continuous layout for the value_type on each call of getValue(). struct CellExt { CellExt() {} - CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast(ptr_)) {} - void update(Key && key_, const FixedHashMapCell * ptr_) { + CellExt(Key&& key_, const FixedHashMapCell* ptr_) + : key(key_), ptr(const_cast(ptr_)) {} + void update(Key&& key_, const FixedHashMapCell* ptr_) { key = key_; - ptr = const_cast(ptr_); + ptr = const_cast(ptr_); } Key key; - FixedHashMapCell * ptr; + FixedHashMapCell* ptr; - const Key & get_key() const { return key; } - Mapped & get_mapped() { return ptr->mapped; } - const Mapped & get_mapped() const { return ptr->mapped; } + const Key& get_key() const { return key; } + Mapped& get_mapped() { return ptr->mapped; } + const Mapped& get_mapped() const { return ptr->mapped; } const value_type get_value() const { return {key, ptr->mapped}; } }; }; - /// In case when we can encode empty cells with zero mapped values. template struct FixedHashMapImplicitZeroCell { @@ -77,46 +77,44 @@ struct FixedHashMapImplicitZeroCell { Mapped mapped; FixedHashMapImplicitZeroCell() {} - FixedHashMapImplicitZeroCell(const Key &, const State &) {} - FixedHashMapImplicitZeroCell(const value_type & value_, const State &) : mapped(value_.second) {} + FixedHashMapImplicitZeroCell(const Key&, const State&) {} + FixedHashMapImplicitZeroCell(const value_type& value_, const State&) : mapped(value_.second) {} const VoidKey get_first() const { return {}; } - Mapped & get_second() { return mapped; } - const Mapped & get_second() const { return mapped; } + Mapped& get_second() { return mapped; } + const Mapped& get_second() const { return mapped; } - bool is_zero(const State &) const { return !mapped; } + bool is_zero(const State&) const { return !mapped; } void set_zero() { mapped = {}; } /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field. /// Note that we have to assemble a continuous layout for the value_type on each call of getValue(). struct CellExt { CellExt() {} - CellExt(Key && key_, const FixedHashMapImplicitZeroCell * ptr_) : key(key_), ptr(const_cast(ptr_)) {} - void update(Key && key_, const FixedHashMapImplicitZeroCell * ptr_) { + CellExt(Key&& key_, const FixedHashMapImplicitZeroCell* ptr_) + : key(key_), ptr(const_cast(ptr_)) {} + void update(Key&& key_, const FixedHashMapImplicitZeroCell* ptr_) { key = key_; - ptr = const_cast(ptr_); + ptr = const_cast(ptr_); } Key key; - FixedHashMapImplicitZeroCell * ptr; + FixedHashMapImplicitZeroCell* ptr; - const Key & get_first() const { return key; } - Mapped & get_second() { return ptr->mapped; } - const Mapped & get_second() const { return ptr->mapped; } + const Key& get_first() const { return key; } + Mapped& get_second() { return ptr->mapped; } + const Mapped& get_second() const { return ptr->mapped; } const value_type get_value() const { return {key, ptr->mapped}; } }; }; template -ALWAYS_INLINE inline auto lookup_result_get_mapped(FixedHashMapImplicitZeroCell* cell) { +ALWAYS_INLINE inline auto lookup_result_get_mapped( + FixedHashMapImplicitZeroCell* cell) { return &cell->get_second(); } -template < - typename Key, - typename Mapped, - typename Cell = FixedHashMapCell, - typename Size = FixedHashTableStoredSize, - typename Allocator = HashTableAllocator> +template , + typename Size = FixedHashTableStoredSize, typename Allocator = HashTableAllocator> class FixedHashMap : public FixedHashTable { public: using Base = FixedHashTable; @@ -126,7 +124,7 @@ public: using Base::Base; template - void ALWAYS_INLINE merge_to_via_emplace(Self & that, Func && func) { + void ALWAYS_INLINE merge_to_via_emplace(Self& that, Func&& func) { for (auto it = this->begin(), end = this->end(); it != end; ++it) { typename Self::LookupResult res_it; bool inserted; @@ -136,7 +134,7 @@ public: } template - void ALWAYS_INLINE merge_to_via_find(Self & that, Func && func) { + void ALWAYS_INLINE merge_to_via_find(Self& that, Func&& func) { for (auto it = this->begin(), end = this->end(); it != end; ++it) { auto res_it = that.find(it->get_key(), it.get_hash()); if (!res_it) @@ -147,48 +145,39 @@ public: } template - void for_each_value(Func && func) { - for (auto & v : *this) - func(v.get_key(), v.get_mapped()); + void for_each_value(Func&& func) { + for (auto& v : *this) func(v.get_key(), v.get_mapped()); } template - void for_each_mapped(Func && func) { - for (auto & v : *this) - func(v.get_second()); + void for_each_mapped(Func&& func) { + for (auto& v : *this) func(v.get_second()); } - Mapped & ALWAYS_INLINE operator[](const Key & x) { + Mapped& ALWAYS_INLINE operator[](const Key& x) { LookupResult it; bool inserted; this->emplace(x, it, inserted); - if (inserted) - new (&it->get_mapped()) Mapped(); + if (inserted) new (&it->get_mapped()) Mapped(); return it->get_mapped(); } // fixed hash map never overflow - bool add_elem_size_overflow(size_t add_size) const { - return false; - } + bool add_elem_size_overflow(size_t add_size) const { return false; } char* get_null_key_data() { return nullptr; } bool has_null_key_data() const { return false; } }; template -using FixedImplicitZeroHashMap = FixedHashMap< - Key, - Mapped, - FixedHashMapImplicitZeroCell, - FixedHashTableStoredSize>, - Allocator>; +using FixedImplicitZeroHashMap = + FixedHashMap, + FixedHashTableStoredSize>, + Allocator>; template -using FixedImplicitZeroHashMapWithCalculatedSize = FixedHashMap< - Key, - Mapped, - FixedHashMapImplicitZeroCell, - FixedHashTableCalculatedSize>, - Allocator>; \ No newline at end of file +using FixedImplicitZeroHashMapWithCalculatedSize = + FixedHashMap, + FixedHashTableCalculatedSize>, + Allocator>; \ No newline at end of file diff --git a/be/src/vec/common/hash_table/fixed_hash_table.h b/be/src/vec/common/hash_table/fixed_hash_table.h index fc03781a49..338ae03905 100644 --- a/be/src/vec/common/hash_table/fixed_hash_table.h +++ b/be/src/vec/common/hash_table/fixed_hash_table.h @@ -31,12 +31,12 @@ struct FixedHashTableCell { bool full; FixedHashTableCell() {} - FixedHashTableCell(const Key &, const State &) : full(true) {} + FixedHashTableCell(const Key&, const State&) : full(true) {} const VoidKey get_key() const { return {}; } VoidMapped get_mapped() const { return {}; } - bool is_zero(const State &) const { return !full; } + bool is_zero(const State&) const { return !full; } void set_zero() { full = false; } static constexpr bool need_zero_value_storage = false; @@ -51,20 +51,19 @@ struct FixedHashTableCell { const VoidKey get_key() const { return {}; } VoidMapped get_mapped() const { return {}; } - const value_type & get_value() const { return key; } - void update(Key && key_, FixedHashTableCell *) { key = key_; } + const value_type& get_value() const { return key; } + void update(Key&& key_, FixedHashTableCell*) { key = key_; } }; }; - /// How to obtain the size of the table. template struct FixedHashTableStoredSize { size_t m_size = 0; - size_t get_size(const Cell *, const typename Cell::State &, size_t) const { return m_size; } - bool is_empty(const Cell *, const typename Cell::State &, size_t) const { return m_size == 0; } + size_t get_size(const Cell*, const typename Cell::State&, size_t) const { return m_size; } + bool is_empty(const Cell*, const typename Cell::State&, size_t) const { return m_size == 0; } void increase_size() { ++m_size; } void clear_size() { m_size = 0; } @@ -73,18 +72,16 @@ struct FixedHashTableStoredSize { template struct FixedHashTableCalculatedSize { - size_t get_size(const Cell * buf, const typename Cell::State & state, size_t num_cells) const { + size_t get_size(const Cell* buf, const typename Cell::State& state, size_t num_cells) const { size_t res = 0; - for (const Cell * end = buf + num_cells; buf != end; ++buf) - if (!buf->is_zero(state)) - ++res; + for (const Cell* end = buf + num_cells; buf != end; ++buf) + if (!buf->is_zero(state)) ++res; return res; } - bool isEmpty(const Cell * buf, const typename Cell::State & state, size_t num_cells) const { - for (const Cell * end = buf + num_cells; buf != end; ++buf) - if (!buf->is_zero(state)) - return false; + bool isEmpty(const Cell* buf, const typename Cell::State& state, size_t num_cells) const { + for (const Cell* end = buf + num_cells; buf != end; ++buf) + if (!buf->is_zero(state)) return false; return true; } @@ -93,7 +90,6 @@ struct FixedHashTableCalculatedSize { void set_size(size_t) {} }; - /** Used as a lookup table for small keys such as UInt8, UInt16. It's different * than a HashTable in that keys are not stored in the Cell buf, but inferred * inside each iterator. There are a bunch of to make it faster than using @@ -111,7 +107,10 @@ struct FixedHashTableCalculatedSize { * TwoLevelHashSet(Map) to contain different type of sets(maps). */ template -class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size { +class FixedHashTable : private boost::noncopyable, + protected Allocator, + protected Cell::State, + protected Size { static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8); protected: @@ -120,9 +119,9 @@ protected: using Self = FixedHashTable; - Cell * buf; /// A piece of memory for all elements. + Cell* buf; /// A piece of memory for all elements. - void alloc() { buf = reinterpret_cast(Allocator::alloc(NUM_CELLS * sizeof(Cell))); } + void alloc() { buf = reinterpret_cast(Allocator::alloc(NUM_CELLS * sizeof(Cell))); } void free() { if (buf) { @@ -133,49 +132,44 @@ protected: void destroy_elements() { if (!std::is_trivially_destructible_v) - for (iterator it = begin(), it_end = end(); it != it_end; ++it) - it.ptr->~Cell(); + for (iterator it = begin(), it_end = end(); it != it_end; ++it) it.ptr->~Cell(); } - template class iterator_base { using Container = std::conditional_t; using cell_type = std::conditional_t; - Container * container; - cell_type * ptr; + Container* container; + cell_type* ptr; friend class FixedHashTable; public: iterator_base() {} - iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_) { + iterator_base(Container* container_, cell_type* ptr_) : container(container_), ptr(ptr_) { cell.update(ptr - container->buf, ptr); } - bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; } - bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; } + bool operator==(const iterator_base& rhs) const { return ptr == rhs.ptr; } + bool operator!=(const iterator_base& rhs) const { return ptr != rhs.ptr; } - Derived & operator++() { + Derived& operator++() { ++ptr; /// Skip empty cells in the main buffer. auto buf_end = container->buf + container->NUM_CELLS; - while (ptr < buf_end && ptr->is_zero(*container)) - ++ptr; + while (ptr < buf_end && ptr->is_zero(*container)) ++ptr; - return static_cast(*this); + return static_cast(*this); } - auto & operator*() { - if (cell.key != ptr - container->buf) - cell.update(ptr - container->buf, ptr); + auto& operator*() { + if (cell.key != ptr - container->buf) cell.update(ptr - container->buf, ptr); return cell; } - auto * operator-> () { - if (cell.key != ptr - container->buf) - cell.update(ptr - container->buf, ptr); + auto* operator->() { + if (cell.key != ptr - container->buf) cell.update(ptr - container->buf, ptr); return &cell; } @@ -185,29 +179,27 @@ protected: typename cell_type::CellExt cell; }; - public: using key_type = Key; using mapped_type = typename Cell::mapped_type; using value_type = typename Cell::value_type; using cell_type = Cell; - using LookupResult = Cell *; - using ConstLookupResult = const Cell *; + using LookupResult = Cell*; + using ConstLookupResult = const Cell*; - - size_t hash(const Key & x) const { return x; } + size_t hash(const Key& x) const { return x; } FixedHashTable() { alloc(); } - FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); } + FixedHashTable(FixedHashTable&& rhs) : buf(nullptr) { *this = std::move(rhs); } ~FixedHashTable() { destroy_elements(); free(); } - FixedHashTable & operator=(FixedHashTable && rhs) { + FixedHashTable& operator=(FixedHashTable&& rhs) { destroy_elements(); free(); @@ -230,15 +222,12 @@ public: using iterator_base::iterator_base; }; - const_iterator begin() const { - if (!buf) - return end(); + if (!buf) return end(); - const Cell * ptr = buf; + const Cell* ptr = buf; auto buf_end = buf + NUM_CELLS; - while (ptr < buf_end && ptr->is_zero(*this)) - ++ptr; + while (ptr < buf_end && ptr->is_zero(*this)) ++ptr; return const_iterator(this, ptr); } @@ -246,13 +235,11 @@ public: const_iterator cbegin() const { return begin(); } iterator begin() { - if (!buf) - return end(); + if (!buf) return end(); - Cell * ptr = buf; + Cell* ptr = buf; auto buf_end = buf + NUM_CELLS; - while (ptr < buf_end && ptr->is_zero(*this)) - ++ptr; + while (ptr < buf_end && ptr->is_zero(*this)) ++ptr; return iterator(this, ptr); } @@ -262,22 +249,17 @@ public: return const_iterator(this, buf ? buf + NUM_CELLS : buf); } - const_iterator cend() const { - return end(); - } - - iterator end() { - return iterator(this, buf ? buf + NUM_CELLS : buf); - } + const_iterator cend() const { return end(); } + iterator end() { return iterator(this, buf ? buf + NUM_CELLS : buf); } public: /// The last parameter is unused but exists for compatibility with HashTable interface. - void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) { + void ALWAYS_INLINE emplace(const Key& x, LookupResult& it, bool& inserted, + size_t /* hash */ = 0) { it = &buf[x]; - if (!buf[x].is_zero(*this)) - { + if (!buf[x].is_zero(*this)) { inserted = false; return; } @@ -287,34 +269,40 @@ public: this->increase_size(); } - std::pair ALWAYS_INLINE insert(const value_type & x) { + std::pair ALWAYS_INLINE insert(const value_type& x) { std::pair res; emplace(Cell::get_key(x), res.first, res.second); - if (res.second) - insert_set_mapped(res.first->get_mapped(), x); + if (res.second) insert_set_mapped(res.first->get_mapped(), x); return res; } - LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].is_zero(*this) ? &buf[x] : nullptr; } - - ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast *>(this)->find(x); } - - LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].is_zero(*this) ? &buf[hash_value] : nullptr; } - - ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const { - return const_cast *>(this)->find(key, hash_value); + LookupResult ALWAYS_INLINE find(const Key& x) { + return !buf[x].is_zero(*this) ? &buf[x] : nullptr; } - bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].is_zero(*this); } - bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].is_zero(*this); } + ConstLookupResult ALWAYS_INLINE find(const Key& x) const { + return const_cast*>(this)->find(x); + } + + LookupResult ALWAYS_INLINE find(const Key&, size_t hash_value) { + return !buf[hash_value].is_zero(*this) ? &buf[hash_value] : nullptr; + } + + ConstLookupResult ALWAYS_INLINE find(const Key& key, size_t hash_value) const { + return const_cast*>(this)->find(key, hash_value); + } + + bool ALWAYS_INLINE has(const Key& x) const { return !buf[x].is_zero(*this); } + bool ALWAYS_INLINE has(const Key&, size_t hash_value) const { + return !buf[hash_value].is_zero(*this); + } void write(doris::vectorized::BufferWritable& wb) const { Cell::State::write(wb); doris::vectorized::write_var_uint(size(), wb); - if (!buf) - return; + if (!buf) return; for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) { if (!ptr->is_zero(*this)) { @@ -349,7 +337,7 @@ public: destroy_elements(); this->clear_size(); - memset(static_cast(buf), 0, NUM_CELLS * sizeof(*buf)); + memset(static_cast(buf), 0, NUM_CELLS * sizeof(*buf)); } /// After executing this function, the table can only be destroyed, @@ -369,13 +357,12 @@ public: /// because offset for zero value considered to be 0 /// and for other values it will be `offset in buffer + 1` size_t offset_internal(ConstLookupResult ptr) const { - if (ptr->is_zero(*this)) - return 0; + if (ptr->is_zero(*this)) return 0; return ptr - buf + 1; } - const Cell * data() const { return buf; } - Cell * data() { return buf; } + const Cell* data() const { return buf; } + Cell* data() { return buf; } #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS size_t get_collisions() const { return 0; } diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h index 3ab335d9be..2fa8ebfb72 100644 --- a/be/src/vec/common/hash_table/hash.h +++ b/be/src/vec/common/hash_table/hash.h @@ -115,8 +115,8 @@ inline size_t hash_crc32(doris::vectorized::UInt128 u) { template <> inline size_t hash_crc32(doris::vectorized::Int128 u) { - return doris::vectorized::UInt128HashCRC32()(doris::vectorized::UInt128( - (u >> 64) & int64_t (-1), u & int64_t(-1))); + return doris::vectorized::UInt128HashCRC32()( + doris::vectorized::UInt128((u >> 64) & int64_t(-1), u & int64_t(-1))); } #define DEFINE_HASH(T) \ diff --git a/be/src/vec/common/hash_table/hash_set.h b/be/src/vec/common/hash_table/hash_set.h index 23dd606549..337a41afb2 100644 --- a/be/src/vec/common/hash_table/hash_set.h +++ b/be/src/vec/common/hash_table/hash_set.h @@ -48,7 +48,6 @@ public: for (size_t i = 0; i < rhs.grower.buf_size(); ++i) if (!rhs.buf[i].is_zero(*this)) this->insert(Cell::get_key(rhs.buf[i].get_value())); } - }; template diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index f0a94b77f3..26cfc61d3d 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -385,7 +385,8 @@ protected: return place_value; } - std::pair ALWAYS_INLINE find_cell_opt(const Key& x, size_t hash_value, size_t place_value) const { + std::pair ALWAYS_INLINE find_cell_opt(const Key& x, size_t hash_value, + size_t place_value) const { bool is_zero = false; do { is_zero = buf[place_value].is_zero(*this); @@ -870,9 +871,7 @@ public: float get_factor() const { return MAX_BUCKET_OCCUPANCY_FRACTION; } - bool should_be_shrink(int64_t valid_row) { - return valid_row < get_factor() * (size() / 2.0); - } + bool should_be_shrink(int64_t valid_row) { return valid_row < get_factor() * (size() / 2.0); } void init_buf_size(size_t reserve_for_num_elements) { free(); @@ -881,8 +880,7 @@ public: } void delete_zero_key(Key key) { - if (Cell::is_zero(key, *this)) - this->clear_get_has_zero(); + if (Cell::is_zero(key, *this)) this->clear_get_has_zero(); } void clear() { destroy_elements(); diff --git a/be/src/vec/common/memcmp_small.h b/be/src/vec/common/memcmp_small.h index d21af0568b..d5f9dec949 100644 --- a/be/src/vec/common/memcmp_small.h +++ b/be/src/vec/common/memcmp_small.h @@ -49,7 +49,8 @@ inline int cmp(T a, T b) { /** Variant when memory regions may have different sizes. */ template -inline int memcmp_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, size_t b_size) { +inline int memcmp_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, + size_t b_size) { size_t min_size = std::min(a_size, b_size); for (size_t offset = 0; offset < min_size; offset += 16) { @@ -97,7 +98,7 @@ inline int memcmp_small_allow_overflow15(const Char* a, const Char* b, size_t si */ template inline bool memequal_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, - size_t b_size) { + size_t b_size) { if (a_size != b_size) return false; for (size_t offset = 0; offset < a_size; offset += 16) { @@ -183,7 +184,8 @@ inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) #include template -inline int memcmp_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, size_t b_size) { +inline int memcmp_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, + size_t b_size) { if (auto res = memcmp(a, b, std::min(a_size, b_size))) return res; else @@ -197,7 +199,7 @@ inline int memcmp_small_allow_overflow15(const Char* a, const Char* b, size_t si template inline bool memequal_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, - size_t b_size) { + size_t b_size) { return a_size == b_size && 0 == memcmp(a, b, a_size); } diff --git a/be/src/vec/common/memcpy_small.h b/be/src/vec/common/memcpy_small.h index 2dfddf5574..727070bd4e 100644 --- a/be/src/vec/common/memcpy_small.h +++ b/be/src/vec/common/memcpy_small.h @@ -48,7 +48,7 @@ namespace detail { inline void memcpy_small_allow_read_write_overflow15_impl(char* __restrict dst, - const char* __restrict src, ssize_t n) { + const char* __restrict src, ssize_t n) { while (n > 0) { _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), _mm_loadu_si128(reinterpret_cast(src))); @@ -63,10 +63,10 @@ inline void memcpy_small_allow_read_write_overflow15_impl(char* __restrict dst, /** Works under assumption, that it's possible to read up to 15 excessive bytes after end of 'src' region * and to write any garbage into up to 15 bytes after end of 'dst' region. */ -inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, const void* __restrict src, - size_t n) { +inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, + const void* __restrict src, size_t n) { detail::memcpy_small_allow_read_write_overflow15_impl(reinterpret_cast(dst), - reinterpret_cast(src), n); + reinterpret_cast(src), n); } /** NOTE There was also a function, that assumes, that you could read any bytes inside same memory page of src. @@ -75,8 +75,8 @@ inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, const #else /// Implementation for other platforms. -inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, const void* __restrict src, - size_t n) { +inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, + const void* __restrict src, size_t n) { memcpy(dst, src, n); } diff --git a/be/src/vec/common/mremap.cpp b/be/src/vec/common/mremap.cpp index 58d25f9131..4a446788d5 100644 --- a/be/src/vec/common/mremap.cpp +++ b/be/src/vec/common/mremap.cpp @@ -25,27 +25,22 @@ #include "vec/common/mremap.h" -void * mremap_fallback(void * old_address, size_t old_size, size_t new_size, - int flags, int mmap_prot, int mmap_flags, int mmap_fd, off_t mmap_offset) -{ +void* mremap_fallback(void* old_address, size_t old_size, size_t new_size, int flags, int mmap_prot, + int mmap_flags, int mmap_fd, off_t mmap_offset) { /// No actual shrink - if (new_size < old_size) - return old_address; + if (new_size < old_size) return old_address; - if (!(flags & MREMAP_MAYMOVE)) - { + if (!(flags & MREMAP_MAYMOVE)) { errno = ENOMEM; return MAP_FAILED; } - void * new_address = mmap(nullptr, new_size, mmap_prot, mmap_flags, mmap_fd, mmap_offset); - if (MAP_FAILED == new_address) - return MAP_FAILED; + void* new_address = mmap(nullptr, new_size, mmap_prot, mmap_flags, mmap_fd, mmap_offset); + if (MAP_FAILED == new_address) return MAP_FAILED; memcpy(new_address, old_address, old_size); - if (munmap(old_address, old_size)) - abort(); + if (munmap(old_address, old_size)) abort(); return new_address; } diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h index d6419fdb8f..b1148e2887 100644 --- a/be/src/vec/common/pod_array.h +++ b/be/src/vec/common/pod_array.h @@ -368,7 +368,7 @@ public: void add_num_element(U&& x, uint32_t num, TAllocatorParams&&... allocator_params) { if (num != 0) { const auto new_end = this->c_end + this->byte_size(num); - if (UNLIKELY( new_end > this->c_end_of_storage)) { + if (UNLIKELY(new_end > this->c_end_of_storage)) { this->reserve(this->size() + num); } std::fill(t_end(), t_end() + num, x); @@ -377,7 +377,8 @@ public: } template - void add_num_element_without_reserve(U&& x, uint32_t num, TAllocatorParams&&... allocator_params) { + void add_num_element_without_reserve(U&& x, uint32_t num, + TAllocatorParams&&... allocator_params) { std::fill(t_end(), t_end() + num, x); this->c_end += sizeof(T) * num; } diff --git a/be/src/vec/common/radix_sort.h b/be/src/vec/common/radix_sort.h index 13a8ad1f8b..3041a5756a 100644 --- a/be/src/vec/common/radix_sort.h +++ b/be/src/vec/common/radix_sort.h @@ -191,8 +191,8 @@ private: Element* j; Element tmp = *i; *i = *(i - 1); - for (j = i - 1; - j > arr && Traits::less(Traits::extract_key(tmp), Traits::extract_key(*(j - 1))); + for (j = i - 1; j > arr && Traits::less(Traits::extract_key(tmp), + Traits::extract_key(*(j - 1))); --j) *j = *(j - 1); *j = tmp; @@ -330,8 +330,8 @@ public: /// On the last pass, we do the reverse transformation. if (!Traits::Transform::transform_is_simple && pass == NUM_PASSES - 1) - Traits::extract_key(dest) = bits_to_key( - Traits::Transform::backward(key_to_bits(Traits::extract_key(reader[i])))); + Traits::extract_key(dest) = bits_to_key(Traits::Transform::backward( + key_to_bits(Traits::extract_key(reader[i])))); } } diff --git a/be/src/vec/common/string_buffer.hpp b/be/src/vec/common/string_buffer.hpp index d0de99cdad..b712871b5a 100644 --- a/be/src/vec/common/string_buffer.hpp +++ b/be/src/vec/common/string_buffer.hpp @@ -40,8 +40,8 @@ public: class VectorBufferWriter final : public BufferWritable { public: - explicit VectorBufferWriter(ColumnString& vector) : - _data(vector.get_chars()), _offsets(vector.get_offsets()) {} + explicit VectorBufferWriter(ColumnString& vector) + : _data(vector.get_chars()), _offsets(vector.get_offsets()) {} void write(const char* data, int len) override { _data.insert(data, data + len); @@ -54,9 +54,7 @@ public: _now_offset = 0; } - ~VectorBufferWriter() { - DCHECK(_now_offset == 0); - } + ~VectorBufferWriter() { DCHECK(_now_offset == 0); } private: ColumnString::Chars& _data; diff --git a/be/src/vec/core/accurate_comparison.h b/be/src/vec/core/accurate_comparison.h index 736d44770e..6924c1eeca 100644 --- a/be/src/vec/core/accurate_comparison.h +++ b/be/src/vec/core/accurate_comparison.h @@ -489,9 +489,7 @@ struct EqualsOp { template <> struct EqualsOp { - static UInt8 apply(const Int128& a, const Int128& b) { - return a == b; - } + static UInt8 apply(const Int128& a, const Int128& b) { return a == b; } }; template @@ -502,9 +500,7 @@ struct NotEqualsOp { template <> struct NotEqualsOp { - static UInt8 apply(const Int128& a, const Int128& b) { - return a != b; - } + static UInt8 apply(const Int128& a, const Int128& b) { return a != b; } }; template @@ -525,9 +521,7 @@ struct LessOp { template <> struct LessOp { - static UInt8 apply(StringRef a, StringRef b) { - return a < b; - } + static UInt8 apply(StringRef a, StringRef b) { return a < b; } }; template @@ -545,9 +539,7 @@ struct GreaterOp { template <> struct GreaterOp { - static UInt8 apply(StringRef a, StringRef b) { - return a > b; - } + static UInt8 apply(StringRef a, StringRef b) { return a > b; } }; template diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 90d3984ee0..8d5880531a 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -947,18 +947,19 @@ void Block::shrink_char_type_column_suffix_zero(const std::vector& char_ if (idx < data.size()) { if (this->get_by_position(idx).column->is_nullable()) { this->get_by_position(idx).column = ColumnNullable::create( - reinterpret_cast( - reinterpret_cast( + reinterpret_cast( + reinterpret_cast( this->get_by_position(idx).column.get()) ->get_nested_column_ptr() .get()) ->get_shinked_column(), - reinterpret_cast(this->get_by_position(idx).column.get()) + reinterpret_cast( + this->get_by_position(idx).column.get()) ->get_null_map_column_ptr()); } else { - this->get_by_position(idx).column = - reinterpret_cast(this->get_by_position(idx).column.get()) - ->get_shinked_column(); + this->get_by_position(idx).column = reinterpret_cast( + this->get_by_position(idx).column.get()) + ->get_shinked_column(); } } } diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index ed5860dd5a..375ef6906f 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -358,10 +358,14 @@ public: DataTypes& data_types() { return _data_types; } MutableColumnPtr& get_column_by_position(size_t position) { return _columns[position]; } - const MutableColumnPtr& get_column_by_position(size_t position) const { return _columns[position]; } + const MutableColumnPtr& get_column_by_position(size_t position) const { + return _columns[position]; + } DataTypePtr& get_datatype_by_position(size_t position) { return _data_types[position]; } - const DataTypePtr& get_datatype_by_position(size_t position) const { return _data_types[position]; } + const DataTypePtr& get_datatype_by_position(size_t position) const { + return _data_types[position]; + } int compare_at(size_t n, size_t m, size_t num_columns, const MutableBlock& rhs, int nan_direction_hint) const { diff --git a/be/src/vec/core/block_info.h b/be/src/vec/core/block_info.h index 5912b7b053..2ab7e2d5ef 100644 --- a/be/src/vec/core/block_info.h +++ b/be/src/vec/core/block_info.h @@ -26,7 +26,6 @@ namespace doris::vectorized { - /** More information about the block. */ struct BlockInfo { @@ -51,7 +50,6 @@ struct BlockInfo { APPLY_FOR_BLOCK_INFO_FIELDS(DECLARE_FIELD_VEC) #undef DECLARE_FIELD_VEC - }; /// Block extention to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults. diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp index 4bb3e2c91b..81684a2da5 100644 --- a/be/src/vec/core/field.cpp +++ b/be/src/vec/core/field.cpp @@ -115,8 +115,7 @@ void write_binary(const Array& x, BufferWritable& buf) { break; } } - } - ; + }; } template <> diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h index 0db281b6f2..bbdbcdb783 100644 --- a/be/src/vec/core/sort_block.h +++ b/be/src/vec/core/sort_block.h @@ -23,33 +23,32 @@ #include "vec/core/block.h" #include "vec/core/sort_description.h" -namespace doris::vectorized -{ +namespace doris::vectorized { /// Sort one block by `description`. If limit != 0, then the partial sort of the first `limit` rows is produced. -void sort_block(Block & block, const SortDescription & description, UInt64 limit = 0); - +void sort_block(Block& block, const SortDescription& description, UInt64 limit = 0); /** Used only in StorageMergeTree to sort the data with INSERT. * Sorting is stable. This is important for keeping the order of rows in the CollapsingMergeTree engine * - because based on the order of rows it is determined whether to delete or leave groups of rows when collapsing. * Collations are not supported. Partial sorting is not supported. */ -void stable_sort_block(Block & block, const SortDescription & description); +void stable_sort_block(Block& block, const SortDescription& description); /** Same as stable_sort_block, but do not sort the block, but only calculate the permutation of the values, * so that you can rearrange the column values yourself. */ -void stable_get_permutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation); - +void stable_get_permutation(const Block& block, const SortDescription& description, + IColumn::Permutation& out_permutation); /** Quickly check whether the block is already sorted. If the block is not sorted - returns false as fast as possible. * Collations are not supported. */ -bool is_already_sorted(const Block & block, const SortDescription & description); +bool is_already_sorted(const Block& block, const SortDescription& description); -using ColumnsWithSortDescriptions = std::vector>; +using ColumnsWithSortDescriptions = std::vector>; -ColumnsWithSortDescriptions get_columns_with_sort_description(const Block & block, const SortDescription & description); +ColumnsWithSortDescriptions get_columns_with_sort_description(const Block& block, + const SortDescription& description); -} +} // namespace doris::vectorized diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h index 6a505c86d8..0dffb7454b 100644 --- a/be/src/vec/core/sort_cursor.h +++ b/be/src/vec/core/sort_cursor.h @@ -149,7 +149,7 @@ struct ReceiveQueueSortCursorImpl : public SortCursorImpl { const std::vector& _ordering_expr; Block* _block_ptr = nullptr; - BlockSupplier _block_supplier{}; + BlockSupplier _block_supplier {}; bool _is_eof = false; }; diff --git a/be/src/vec/core/sort_description.h b/be/src/vec/core/sort_description.h index 3a4fbca140..b84c4fbdfb 100644 --- a/be/src/vec/core/sort_description.h +++ b/be/src/vec/core/sort_description.h @@ -28,53 +28,50 @@ class Collator; -namespace doris::vectorized -{ +namespace doris::vectorized { -struct FillColumnDescription -{ +struct FillColumnDescription { /// All missed values in range [FROM, TO) will be filled /// Range [FROM, TO) respects sorting direction - Field fill_from; /// Fill value >= FILL_FROM - Field fill_to; /// Fill value + STEP < FILL_TO - Field fill_step; /// Default = 1 or -1 according to direction + Field fill_from; /// Fill value >= FILL_FROM + Field fill_to; /// Fill value + STEP < FILL_TO + Field fill_step; /// Default = 1 or -1 according to direction }; /// Description of the sorting rule by one column. -struct SortColumnDescription -{ +struct SortColumnDescription { std::string column_name; /// The name of the column. - int column_number; /// Column number (used if no name is given). + int column_number; /// Column number (used if no name is given). int direction; /// 1 - ascending, -1 - descending. int nulls_direction; /// 1 - NULLs and NaNs are greater, -1 - less. - /// To achieve NULLS LAST, set it equal to direction, to achieve NULLS FIRST, set it opposite. - std::shared_ptr collator = nullptr; /// Collator for locale-specific comparison of strings + /// To achieve NULLS LAST, set it equal to direction, to achieve NULLS FIRST, set it opposite. + std::shared_ptr collator = + nullptr; /// Collator for locale-specific comparison of strings bool with_fill = false; FillColumnDescription fill_description = {}; - - SortColumnDescription( - int column_number_, int direction_, int nulls_direction_, - const std::shared_ptr & collator_ = nullptr, bool with_fill_ = false, - const FillColumnDescription & fill_description_ = {}) - : column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_) - , with_fill(with_fill_), fill_description(fill_description_) {} + SortColumnDescription(int column_number_, int direction_, int nulls_direction_, + const std::shared_ptr& collator_ = nullptr, + bool with_fill_ = false, + const FillColumnDescription& fill_description_ = {}) + : column_number(column_number_), + direction(direction_), + nulls_direction(nulls_direction_), + collator(collator_), + with_fill(with_fill_), + fill_description(fill_description_) {} SortColumnDescription() {} - bool operator == (const SortColumnDescription & other) const - { - return column_name == other.column_name && column_number == other.column_number - && direction == other.direction && nulls_direction == other.nulls_direction; + bool operator==(const SortColumnDescription& other) const { + return column_name == other.column_name && column_number == other.column_number && + direction == other.direction && nulls_direction == other.nulls_direction; } - bool operator != (const SortColumnDescription & other) const - { - return !(*this == other); - } + bool operator!=(const SortColumnDescription& other) const { return !(*this == other); } }; /// Description of the sorting rule for several columns. using SortDescription = std::vector; -} +} // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_array.cpp b/be/src/vec/data_types/data_type_array.cpp index d7d9d6d827..1f30b48aae 100644 --- a/be/src/vec/data_types/data_type_array.cpp +++ b/be/src/vec/data_types/data_type_array.cpp @@ -26,12 +26,10 @@ namespace doris::vectorized { namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -DataTypeArray::DataTypeArray(const DataTypePtr & nested_) - : nested{nested_} { -} +DataTypeArray::DataTypeArray(const DataTypePtr& nested_) : nested {nested_} {} MutableColumnPtr DataTypeArray::create_column() const { return ColumnArray::create(nested->create_column(), ColumnArray::ColumnOffsets::create()); @@ -41,22 +39,24 @@ Field DataTypeArray::get_default() const { return Array(); } -bool DataTypeArray::equals(const IDataType & rhs) const { - return typeid(rhs) == typeid(*this) && nested->equals(*static_cast(rhs).nested); +bool DataTypeArray::equals(const IDataType& rhs) const { + return typeid(rhs) == typeid(*this) && + nested->equals(*static_cast(rhs).nested); } size_t DataTypeArray::get_number_of_dimensions() const { - const DataTypeArray * nested_array = typeid_cast(nested.get()); - if (!nested_array) - return 1; - return 1 + nested_array->get_number_of_dimensions(); /// Every modern C++ compiler optimizes tail recursion. + const DataTypeArray* nested_array = typeid_cast(nested.get()); + if (!nested_array) return 1; + return 1 + + nested_array + ->get_number_of_dimensions(); /// Every modern C++ compiler optimizes tail recursion. } int64_t DataTypeArray::get_uncompressed_serialized_bytes(const IColumn& column) const { auto ptr = column.convert_to_full_column_if_const(); const auto& data_column = assert_cast(*ptr.get()); return sizeof(IColumn::Offset) * (column.size() + 1) + - get_nested_type()->get_uncompressed_serialized_bytes(data_column.get_data()); + get_nested_type()->get_uncompressed_serialized_bytes(data_column.get_data()); } char* DataTypeArray::serialize(const IColumn& column, char* buf) const { diff --git a/be/src/vec/data_types/data_type_array.h b/be/src/vec/data_types/data_type_array.h index a389bdabaa..5b67ac68ce 100644 --- a/be/src/vec/data_types/data_type_array.h +++ b/be/src/vec/data_types/data_type_array.h @@ -33,13 +33,13 @@ private: public: static constexpr bool is_parametric = true; - DataTypeArray(const DataTypePtr & nested_); + DataTypeArray(const DataTypePtr& nested_); TypeIndex get_type_id() const override { return TypeIndex::Array; } std::string do_get_name() const override { return "Array(" + nested->get_name() + ")"; } - const char * get_family_name() const override { return "Array"; } + const char* get_family_name() const override { return "Array"; } bool can_be_inside_nullable() const override { return true; } @@ -47,14 +47,20 @@ public: Field get_default() const override; - bool equals(const IDataType & rhs) const override; + bool equals(const IDataType& rhs) const override; bool get_is_parametric() const override { return true; } bool have_subtypes() const override { return true; } - bool cannot_be_stored_in_tables() const override { return nested->cannot_be_stored_in_tables(); } - bool text_can_contain_only_valid_utf8() const override { return nested->text_can_contain_only_valid_utf8(); } + bool cannot_be_stored_in_tables() const override { + return nested->cannot_be_stored_in_tables(); + } + bool text_can_contain_only_valid_utf8() const override { + return nested->text_can_contain_only_valid_utf8(); + } bool is_comparable() const override { return nested->is_comparable(); } - bool can_be_compared_with_collation() const override { return nested->can_be_compared_with_collation(); } + bool can_be_compared_with_collation() const override { + return nested->can_be_compared_with_collation(); + } bool is_value_unambiguously_represented_in_contiguous_memory_region() const override { return nested->is_value_unambiguously_represented_in_contiguous_memory_region(); @@ -62,7 +68,7 @@ public: //SerializationPtr doGetDefaultSerialization() const override; - const DataTypePtr & get_nested_type() const { return nested; } + const DataTypePtr& get_nested_type() const { return nested; } /// 1 for plain array, 2 for array of arrays and so on. size_t get_number_of_dimensions() const; diff --git a/be/src/vec/data_types/data_type_bitmap.cpp b/be/src/vec/data_types/data_type_bitmap.cpp index 97f34ac59a..cb1a2f2e45 100644 --- a/be/src/vec/data_types/data_type_bitmap.cpp +++ b/be/src/vec/data_types/data_type_bitmap.cpp @@ -77,7 +77,7 @@ const char* DataTypeBitMap::deserialize(const char* buf, IColumn* column) const buf += sizeof(size_t) * row_num; // deserialize each bitmap data.resize(row_num); - for (int i = 0; i < row_num ; ++i) { + for (int i = 0; i < row_num; ++i) { data[i].deserialize(buf); buf += bitmap_size_array[i]; } @@ -105,8 +105,9 @@ void DataTypeBitMap::deserialize_as_stream(BitmapValue& value, BufferReadable& b } void DataTypeBitMap::to_string(const class doris::vectorized::IColumn& column, size_t row_num, - doris::vectorized::BufferWritable& ostr) const { - auto& data = const_cast(assert_cast(column).get_element(row_num)); + doris::vectorized::BufferWritable& ostr) const { + auto& data = + const_cast(assert_cast(column).get_element(row_num)); std::string result(data.getSizeInBytes(), '0'); data.write((char*)result.data()); diff --git a/be/src/vec/data_types/data_type_date.cpp b/be/src/vec/data_types/data_type_date.cpp index 8588e01cc0..0dcb3e9b82 100644 --- a/be/src/vec/data_types/data_type_date.cpp +++ b/be/src/vec/data_types/data_type_date.cpp @@ -31,8 +31,9 @@ bool DataTypeDate::equals(const IDataType& rhs) const { std::string DataTypeDate::to_string(const IColumn& column, size_t row_num) const { Int64 int_val = assert_cast(*column.convert_to_full_column_if_const().get()) - .get_data()[row_num]; - doris::vectorized::VecDateTimeValue value = binary_cast(int_val); + .get_data()[row_num]; + doris::vectorized::VecDateTimeValue value = + binary_cast(int_val); std::stringstream ss; // Year uint32_t temp = value.year() / 100; @@ -46,10 +47,11 @@ std::string DataTypeDate::to_string(const IColumn& column, size_t row_num) const return ss.str(); } -void DataTypeDate::to_string(const IColumn & column, size_t row_num, BufferWritable & ostr) const { +void DataTypeDate::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { Int64 int_val = assert_cast(*column.convert_to_full_column_if_const().get()) - .get_data()[row_num]; - doris::vectorized::VecDateTimeValue value = binary_cast(int_val); + .get_data()[row_num]; + doris::vectorized::VecDateTimeValue value = + binary_cast(int_val); char buf[64]; char* pos = value.to_string(buf); diff --git a/be/src/vec/data_types/data_type_date.h b/be/src/vec/data_types/data_type_date.h index afc67eded6..160b7d4a0c 100644 --- a/be/src/vec/data_types/data_type_date.h +++ b/be/src/vec/data_types/data_type_date.h @@ -28,7 +28,7 @@ class DataTypeDate final : public DataTypeNumberBase { public: TypeIndex get_type_id() const override { return TypeIndex::Date; } const char* get_family_name() const override { return "DateTime"; } - std::string do_get_name() const override {return "Date"; } + std::string do_get_name() const override { return "Date"; } bool can_be_used_as_version() const override { return true; } bool can_be_inside_nullable() const override { return true; } diff --git a/be/src/vec/data_types/data_type_date_time.cpp b/be/src/vec/data_types/data_type_date_time.cpp index 4b2619439a..5cb568b3ce 100644 --- a/be/src/vec/data_types/data_type_date_time.cpp +++ b/be/src/vec/data_types/data_type_date_time.cpp @@ -33,11 +33,11 @@ bool DataTypeDateTime::equals(const IDataType& rhs) const { } std::string DataTypeDateTime::to_string(const IColumn& column, size_t row_num) const { - Int64 int_val = - assert_cast(*column.convert_to_full_column_if_const().get()) - .get_data()[row_num]; + Int64 int_val = assert_cast(*column.convert_to_full_column_if_const().get()) + .get_data()[row_num]; // TODO: Rethink we really need to do copy replace const reference here? - doris::vectorized::VecDateTimeValue value = binary_cast(int_val); + doris::vectorized::VecDateTimeValue value = + binary_cast(int_val); std::stringstream ss; // Year @@ -70,11 +70,11 @@ std::string DataTypeDateTime::to_string(const IColumn& column, size_t row_num) c void DataTypeDateTime::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const { - Int64 int_val = - assert_cast(*column.convert_to_full_column_if_const().get()) - .get_data()[row_num]; + Int64 int_val = assert_cast(*column.convert_to_full_column_if_const().get()) + .get_data()[row_num]; // TODO: Rethink we really need to do copy replace const reference here? - doris::vectorized::VecDateTimeValue value = binary_cast(int_val); + doris::vectorized::VecDateTimeValue value = + binary_cast(int_val); char buf[64]; char* pos = value.to_string(buf); diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h index 6f2ef2c3f9..a429e2d0f2 100644 --- a/be/src/vec/data_types/data_type_date_time.h +++ b/be/src/vec/data_types/data_type_date_time.h @@ -52,7 +52,7 @@ public: DataTypeDateTime(); const char* get_family_name() const override { return "DateTime"; } - std::string do_get_name() const override {return "DateTime"; } + std::string do_get_name() const override { return "DateTime"; } TypeIndex get_type_id() const override { return TypeIndex::DateTime; } bool can_be_used_as_version() const override { return true; } diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 29b00a6c7f..555c6417ce 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -163,7 +163,7 @@ DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type) break; case OLAP_FIELD_TYPE_HLL: result = std::make_shared(); - break; + break; case OLAP_FIELD_TYPE_OBJECT: result = std::make_shared(); break; diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp index 8c42fd60b9..4ecb290487 100644 --- a/be/src/vec/data_types/data_type_factory.hpp +++ b/be/src/vec/data_types/data_type_factory.hpp @@ -63,7 +63,7 @@ public: instance.register_data_type("DateTime", std::make_shared()); instance.register_data_type("String", std::make_shared()); instance.register_data_type("Decimal", - std::make_shared>(27, 9)); + std::make_shared>(27, 9)); }); return instance; } diff --git a/be/src/vec/data_types/data_type_nothing.h b/be/src/vec/data_types/data_type_nothing.h index 07bd71924f..8a7d1e63f3 100644 --- a/be/src/vec/data_types/data_type_nothing.h +++ b/be/src/vec/data_types/data_type_nothing.h @@ -57,7 +57,8 @@ public: } void insert_default_into(IColumn&) const override { - LOG(FATAL) << "Method insert_default_into() is not implemented for data type " << get_name(); + LOG(FATAL) << "Method insert_default_into() is not implemented for data type " + << get_name(); } bool have_subtypes() const override { return false; } diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index 52a7f0225f..5c3730b860 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -31,7 +31,7 @@ namespace doris::vectorized { DataTypeNullable::DataTypeNullable(const DataTypePtr& nested_data_type_) - : nested_data_type{nested_data_type_} { + : nested_data_type {nested_data_type_} { if (!nested_data_type->can_be_inside_nullable()) { LOG(FATAL) << fmt::format("Nested type {} cannot be inside Nullable type", nested_data_type->get_name()); @@ -59,8 +59,9 @@ std::string DataTypeNullable::to_string(const IColumn& column, size_t row_num) c int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn& column) const { int64_t size = sizeof(uint32_t); size += sizeof(bool) * column.size(); - size += nested_data_type->get_uncompressed_serialized_bytes(assert_cast( - *column.convert_to_full_column_if_const()).get_nested_column()); + size += nested_data_type->get_uncompressed_serialized_bytes( + assert_cast(*column.convert_to_full_column_if_const()) + .get_nested_column()); return size; } diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp index 8a63f9bf30..fe0518ad38 100644 --- a/be/src/vec/data_types/data_type_string.cpp +++ b/be/src/vec/data_types/data_type_string.cpp @@ -58,8 +58,8 @@ std::string DataTypeString::to_string(const IColumn& column, size_t row_num) con return s.to_string(); } -void DataTypeString::to_string(const class doris::vectorized::IColumn & column, size_t row_num, - class doris::vectorized::BufferWritable & ostr) const { +void DataTypeString::to_string(const class doris::vectorized::IColumn& column, size_t row_num, + class doris::vectorized::BufferWritable& ostr) const { const StringRef& s = assert_cast(*column.convert_to_full_column_if_const().get()) .get_data_at(row_num); @@ -84,7 +84,8 @@ bool DataTypeString::equals(const IDataType& rhs) const { int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn& column) const { auto ptr = column.convert_to_full_column_if_const(); const auto& data_column = assert_cast(*ptr.get()); - return sizeof(uint32_t) * (column.size() + 1) + sizeof(uint64_t) + data_column.get_chars().size(); + return sizeof(uint32_t) * (column.size() + 1) + sizeof(uint64_t) + + data_column.get_chars().size(); } char* DataTypeString::serialize(const IColumn& column, char* buf) const { @@ -121,7 +122,7 @@ const char* DataTypeString::deserialize(const char* buf, IColumn* column) const memcpy(offsets.data(), buf, sizeof(uint32_t) * row_num); buf += sizeof(uint32_t) * row_num; // total length - uint64_t value_len = *reinterpret_cast(buf); + uint64_t value_len = *reinterpret_cast(buf); buf += sizeof(uint64_t); // values data.resize(value_len); diff --git a/be/src/vec/data_types/number_traits.h b/be/src/vec/data_types/number_traits.h index 8405215a05..70830bfcd1 100644 --- a/be/src/vec/data_types/number_traits.h +++ b/be/src/vec/data_types/number_traits.h @@ -170,8 +170,8 @@ struct ResultOfIntegerDivision { */ template struct ResultOfModulo { - using Type = - typename Construct || std::is_signed_v, std::is_floating_point_v, max(sizeof(A), sizeof(B))>::Type; + using Type = typename Construct || std::is_signed_v, + std::is_floating_point_v, max(sizeof(A), sizeof(B))>::Type; }; template diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index c222a7a5b5..a42a8215ff 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -28,12 +28,11 @@ namespace doris::vectorized { -std::variant -static inline make_bool_variant(bool condition) { +std::variant static inline make_bool_variant(bool condition) { if (condition) { - return std::true_type{}; + return std::true_type {}; } else { - return std::false_type{}; + return std::false_type {}; } } @@ -174,7 +173,7 @@ struct ProcessHashTableProbe { // output build side result column template void build_side_output_column(MutableColumns& mcol, int column_offset, int column_length, - const std::vector& output_slot_flags, int size) { + const std::vector& output_slot_flags, int size) { constexpr auto is_semi_anti_join = JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN || JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN || JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN || @@ -188,8 +187,8 @@ struct ProcessHashTableProbe { for (int i = 0; i < column_length; i++) { auto& column = *_build_blocks[0].get_by_position(i).column; if (output_slot_flags[i]) { - mcol[i + column_offset]->insert_indices_from(column, _build_block_rows.data(), - _build_block_rows.data() + size); + mcol[i + column_offset]->insert_indices_from( + column, _build_block_rows.data(), _build_block_rows.data() + size); } else { mcol[i + column_offset]->resize(size); } @@ -201,14 +200,19 @@ struct ProcessHashTableProbe { if constexpr (probe_all) { if (_build_block_offsets[j] == -1) { DCHECK(mcol[i + column_offset]->is_nullable()); - assert_cast( - mcol[i + column_offset].get())->insert_join_null_data(); + assert_cast(mcol[i + column_offset].get()) + ->insert_join_null_data(); } else { - auto &column = *_build_blocks[_build_block_offsets[j]].get_by_position(i).column; - mcol[i + column_offset]->insert_from(column, _build_block_rows[j]); + auto& column = *_build_blocks[_build_block_offsets[j]] + .get_by_position(i) + .column; + mcol[i + column_offset]->insert_from(column, + _build_block_rows[j]); } } else { - auto &column = *_build_blocks[_build_block_offsets[j]].get_by_position(i).column; + auto& column = *_build_blocks[_build_block_offsets[j]] + .get_by_position(i) + .column; mcol[i + column_offset]->insert_from(column, _build_block_rows[j]); } } @@ -221,7 +225,8 @@ struct ProcessHashTableProbe { } // output probe side result column - void probe_side_output_column(MutableColumns& mcol, const std::vector& output_slot_flags, int size) { + void probe_side_output_column(MutableColumns& mcol, const std::vector& output_slot_flags, + int size) { for (int i = 0; i < output_slot_flags.size(); ++i) { if (output_slot_flags[i]) { auto& column = _probe_block.get_by_position(i).column; @@ -240,8 +245,8 @@ struct ProcessHashTableProbe { using KeyGetter = typename HashTableContext::State; using Mapped = typename HashTableContext::Mapped; - int right_col_idx = _join_node->_is_right_semi_anti ? 0 : - _join_node->_left_table_data_types.size(); + int right_col_idx = + _join_node->_is_right_semi_anti ? 0 : _join_node->_left_table_data_types.size(); int right_col_len = _join_node->_right_table_data_types.size(); KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, nullptr); @@ -254,15 +259,15 @@ struct ProcessHashTableProbe { memset(_items_counts.data(), 0, sizeof(uint32_t) * _probe_rows); constexpr auto need_to_set_visited = JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN || - JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN || - JoinOpType::value == TJoinOp::FULL_OUTER_JOIN; + JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN || + JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN || + JoinOpType::value == TJoinOp::FULL_OUTER_JOIN; constexpr auto is_right_semi_anti_join = JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN; + JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN; constexpr auto probe_all = JoinOpType::value == TJoinOp::LEFT_OUTER_JOIN || - JoinOpType::value == TJoinOp::FULL_OUTER_JOIN; + JoinOpType::value == TJoinOp::FULL_OUTER_JOIN; { SCOPED_TIMER(_search_hashtable_timer); @@ -275,9 +280,11 @@ struct ProcessHashTableProbe { } int last_offset = current_offset; auto find_result = (*null_map)[_probe_index] - ? decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index, - _arena)) {nullptr, false} - : key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena); + ? decltype(key_getter.find_key(hash_table_ctx.hash_table, + _probe_index, + _arena)) {nullptr, false} + : key_getter.find_key(hash_table_ctx.hash_table, + _probe_index, _arena); if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) { if (!find_result.is_found()) { @@ -293,8 +300,7 @@ struct ProcessHashTableProbe { // TODO: Iterators are currently considered to be a heavy operation and have a certain impact on performance. // We should rethink whether to use this iterator mode in the future. Now just opt the one row case if (mapped.get_row_count() == 1) { - if constexpr (need_to_set_visited) - mapped.visited = true; + if constexpr (need_to_set_visited) mapped.visited = true; if constexpr (!is_right_semi_anti_join) { _build_block_offsets[current_offset] = mapped.block_offset; @@ -304,7 +310,8 @@ struct ProcessHashTableProbe { } else { // prefetch is more useful while matching to multiple rows if (_probe_index + 2 < _probe_rows) - key_getter.prefetch(hash_table_ctx.hash_table, _probe_index + 2, _arena); + key_getter.prefetch(hash_table_ctx.hash_table, _probe_index + 2, + _arena); for (auto it = mapped.begin(); it.ok(); ++it) { if constexpr (!is_right_semi_anti_join) { @@ -317,8 +324,7 @@ struct ProcessHashTableProbe { } ++current_offset; } - if constexpr (need_to_set_visited) - it->visited = true; + if constexpr (need_to_set_visited) it->visited = true; } } } else { @@ -341,10 +347,11 @@ struct ProcessHashTableProbe { { SCOPED_TIMER(_build_side_output_timer); build_side_output_column(mcol, right_col_idx, right_col_len, - _join_node->_right_output_slot_flags, current_offset); + _join_node->_right_output_slot_flags, current_offset); } - if constexpr (JoinOpType::value != TJoinOp::RIGHT_SEMI_JOIN && JoinOpType::value != TJoinOp::RIGHT_ANTI_JOIN) { + if constexpr (JoinOpType::value != TJoinOp::RIGHT_SEMI_JOIN && + JoinOpType::value != TJoinOp::RIGHT_ANTI_JOIN) { SCOPED_TIMER(_probe_side_output_timer); probe_side_output_column(mcol, _join_node->_left_output_slot_flags, current_offset); } @@ -449,7 +456,7 @@ struct ProcessHashTableProbe { { SCOPED_TIMER(_build_side_output_timer); build_side_output_column(mcol, right_col_idx, right_col_len, - _join_node->_right_output_slot_flags, current_offset); + _join_node->_right_output_slot_flags, current_offset); } { SCOPED_TIMER(_probe_side_output_timer); @@ -524,9 +531,11 @@ struct ProcessHashTableProbe { auto new_filter_column = ColumnVector::create(); auto& filter_map = new_filter_column->get_data(); - if (!column->empty()) filter_map.emplace_back(column->get_bool(0) && visited_map[0]); + if (!column->empty()) + filter_map.emplace_back(column->get_bool(0) && visited_map[0]); for (int i = 1; i < column->size(); ++i) { - if ((visited_map[i] && column->get_bool(i)) || (same_to_prev[i] && filter_map[i - 1])) { + if ((visited_map[i] && column->get_bool(i)) || + (same_to_prev[i] && filter_map[i - 1])) { filter_map.push_back(true); filter_map[i - 1] = !same_to_prev[i] && filter_map[i - 1]; } else { @@ -557,7 +566,8 @@ struct ProcessHashTableProbe { output_block->clear(); } else { if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN || - JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) orig_columns = right_col_idx; + JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) + orig_columns = right_col_idx; Block::filter_block(output_block, result_column_id, orig_columns); } } @@ -572,8 +582,8 @@ struct ProcessHashTableProbe { hash_table_ctx.init_once(); auto& mcol = mutable_block.mutable_columns(); - int right_col_idx = _join_node->_is_right_semi_anti ? 0 : - _join_node->_left_table_data_types.size(); + int right_col_idx = + _join_node->_is_right_semi_anti ? 0 : _join_node->_left_table_data_types.size(); int right_col_len = _join_node->_right_table_data_types.size(); auto& iter = hash_table_ctx.iter; @@ -591,11 +601,9 @@ struct ProcessHashTableProbe { auto& mapped = iter->get_second(); for (auto it = mapped.begin(); it.ok(); ++it) { if constexpr (JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN) { - if (it->visited) - insert_from_hash_table(it->block_offset, it->row_num); + if (it->visited) insert_from_hash_table(it->block_offset, it->row_num); } else { - if (!it->visited) - insert_from_hash_table(it->block_offset, it->row_num); + if (!it->visited) insert_from_hash_table(it->block_offset, it->row_num); } } } @@ -606,7 +614,7 @@ struct ProcessHashTableProbe { JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) { for (int i = 0; i < right_col_idx; ++i) { for (int j = 0; j < block_size; ++j) { - assert_cast(mcol[i].get())->insert_join_null_data(); + assert_cast(mcol[i].get())->insert_join_null_data(); } } } @@ -650,8 +658,9 @@ HashJoinNode::HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const Descr _is_right_semi_anti(_join_op == TJoinOp::RIGHT_ANTI_JOIN || _join_op == TJoinOp::RIGHT_SEMI_JOIN), _is_outer_join(_match_all_build || _match_all_probe), - _hash_output_slot_ids(tnode.hash_join_node.__isset.hash_output_slot_ids ? tnode.hash_join_node.hash_output_slot_ids : - std::vector{}) { + _hash_output_slot_ids(tnode.hash_join_node.__isset.hash_output_slot_ids + ? tnode.hash_join_node.hash_output_slot_ids + : std::vector {}) { _runtime_filter_descs = tnode.runtime_filters; init_join_op(); @@ -665,15 +674,15 @@ HashJoinNode::~HashJoinNode() = default; void HashJoinNode::init_join_op() { switch (_join_op) { -#define M(NAME) \ - case TJoinOp::NAME: \ - _join_op_variants.emplace>(); \ - break; +#define M(NAME) \ + case TJoinOp::NAME: \ + _join_op_variants.emplace>(); \ + break; APPLY_FOR_JOINOP_VARIANTS(M); #undef M - default: - //do nothing - break; + default: + //do nothing + break; } return; } @@ -725,8 +734,8 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { } for (const auto& filter_desc : _runtime_filter_descs) { - RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter(RuntimeFilterRole::PRODUCER, - filter_desc, state->query_options())); + RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter( + RuntimeFilterRole::PRODUCER, filter_desc, state->query_options())); } // init left/right output slots flags, only column of slot_id in _hash_output_slot_ids need @@ -737,9 +746,10 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { auto init_output_slots_flags = [this](auto& tuple_descs, auto& output_slot_flags) { for (const auto& tuple_desc : tuple_descs) { for (const auto& slot_desc : tuple_desc->slots()) { - output_slot_flags.emplace_back(_hash_output_slot_ids.empty() || - std::find(_hash_output_slot_ids.begin(), _hash_output_slot_ids.end(), - slot_desc->id()) != _hash_output_slot_ids.end()); + output_slot_flags.emplace_back( + _hash_output_slot_ids.empty() || + std::find(_hash_output_slot_ids.begin(), _hash_output_slot_ids.end(), + slot_desc->id()) != _hash_output_slot_ids.end()); } } }; @@ -875,38 +885,44 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eo if (_probe_index < _probe_block.rows()) { std::visit( - [&](auto&& arg, auto&& join_op_variants, auto have_other_join_conjunct, auto probe_ignore_null) { - using HashTableCtxType = std::decay_t; - using JoinOpType = std::decay_t; - if constexpr (have_other_join_conjunct) { - MutableBlock mutable_block(VectorizedUtils::create_empty_columnswithtypename( - _row_desc_for_other_join_conjunt)); + [&](auto&& arg, auto&& join_op_variants, auto have_other_join_conjunct, + auto probe_ignore_null) { + using HashTableCtxType = std::decay_t; + using JoinOpType = std::decay_t; + if constexpr (have_other_join_conjunct) { + MutableBlock mutable_block( + VectorizedUtils::create_empty_columnswithtypename( + _row_desc_for_other_join_conjunt)); - if constexpr (!std::is_same_v) { - ProcessHashTableProbe process_hashtable_ctx( - this, state->batch_size(), probe_rows); - st = process_hashtable_ctx.do_process_with_other_join_conjunts( - arg, &_null_map_column->get_data(), - mutable_block, output_block); + if constexpr (!std::is_same_v) { + ProcessHashTableProbe + process_hashtable_ctx(this, state->batch_size(), probe_rows); + st = process_hashtable_ctx.do_process_with_other_join_conjunts( + arg, &_null_map_column->get_data(), mutable_block, + output_block); + } else { + LOG(FATAL) << "FATAL: uninited hash table"; + } } else { - LOG(FATAL) << "FATAL: uninited hash table"; - } - } else { - MutableBlock mutable_block = output_block->mem_reuse() ? MutableBlock(output_block) : - MutableBlock(VectorizedUtils::create_empty_columnswithtypename(row_desc())); + MutableBlock mutable_block = + output_block->mem_reuse() + ? MutableBlock(output_block) + : MutableBlock( + VectorizedUtils::create_empty_columnswithtypename( + row_desc())); - if constexpr (!std::is_same_v) { - ProcessHashTableProbe process_hashtable_ctx( - this, state->batch_size(), probe_rows); - st = process_hashtable_ctx.do_process( - arg, &_null_map_column->get_data(), - mutable_block, output_block); - } else { - LOG(FATAL) << "FATAL: uninited hash table"; + if constexpr (!std::is_same_v) { + ProcessHashTableProbe + process_hashtable_ctx(this, state->batch_size(), probe_rows); + st = process_hashtable_ctx.do_process(arg, + &_null_map_column->get_data(), + mutable_block, output_block); + } else { + LOG(FATAL) << "FATAL: uninited hash table"; + } } - } - }, _hash_table_variants, - _join_op_variants, + }, + _hash_table_variants, _join_op_variants, make_bool_variant(_have_other_join_conjunct), make_bool_variant(_probe_ignore_null)); } else if (_probe_eos) { @@ -918,16 +934,15 @@ Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eo using JoinOpType = std::decay_t; using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { - ProcessHashTableProbe process_hashtable_ctx( - this, state->batch_size(), probe_rows); + ProcessHashTableProbe + process_hashtable_ctx(this, state->batch_size(), probe_rows); st = process_hashtable_ctx.process_data_in_hashtable(arg, mutable_block, output_block, eos); } else { LOG(FATAL) << "FATAL: uninited hash table"; } }, - _hash_table_variants, - _join_op_variants); + _hash_table_variants, _join_op_variants); } else { *eos = true; return Status::OK(); @@ -981,10 +996,12 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) { _hash_table_mem_tracker->consume(block.allocated_bytes()); _mem_used += block.allocated_bytes(); - if (block.rows() != 0) { mutable_block.merge(block); } + if (block.rows() != 0) { + mutable_block.merge(block); + } // make one block for each 4 gigabytes - constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL; + constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL; if (_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE) { _build_blocks.emplace_back(mutable_block.to_block()); // TODO:: Rethink may we should do the proess after we recevie all build blocks ? @@ -1028,7 +1045,7 @@ Status HashJoinNode::extract_build_join_column(Block& block, NullMap& null_map, // TODO: opt the column is const block.get_by_position(result_col_id).column = block.get_by_position(result_col_id).column->convert_to_full_column_if_const(); - + if (_is_null_safe_eq_join[i]) { raw_ptrs[i] = block.get_by_position(result_col_id).column.get(); } else { @@ -1117,8 +1134,8 @@ Status HashJoinNode::_process_build_block(RuntimeState* state, Block& block, uin [&](auto&& arg) -> Status { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { - return extract_build_join_column(block, null_map_val, raw_ptrs, - has_null, *_build_expr_call_timer); + return extract_build_join_column(block, null_map_val, raw_ptrs, has_null, + *_build_expr_call_timer); } else { LOG(FATAL) << "FATAL: uninited hash table"; } @@ -1134,7 +1151,7 @@ Status HashJoinNode::_process_build_block(RuntimeState* state, Block& block, uin if constexpr (!std::is_same_v) { #define CALL_BUILD_FUNCTION(HAS_NULL, BUILD_UNIQUE) \ ProcessHashTableBuild hash_table_build_process( \ - rows, block, raw_ptrs, this, state->batch_size(), offset); \ + rows, block, raw_ptrs, this, state->batch_size(), offset); \ st = hash_table_build_process(arg, &null_map_val, has_runtime_filter); if (std::pair {has_null, _build_unique} == std::pair {true, true}) { CALL_BUILD_FUNCTION(true, true); diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 5e02e36e50..893652bb4e 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -115,29 +115,30 @@ using HashTableVariants = I128FixedKeyHashTableContext, I256FixedKeyHashTableContext, I256FixedKeyHashTableContext>; -using JoinOpVariants = std::variant, - std::integral_constant, - std::integral_constant, - std::integral_constant, - std::integral_constant, - std::integral_constant, - std::integral_constant, - std::integral_constant, - std::integral_constant, - std::integral_constant, - std::integral_constant>; +using JoinOpVariants = + std::variant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant>; -#define APPLY_FOR_JOINOP_VARIANTS(M) \ - M(INNER_JOIN) \ - M(LEFT_SEMI_JOIN) \ - M(LEFT_ANTI_JOIN) \ - M(LEFT_OUTER_JOIN) \ - M(FULL_OUTER_JOIN) \ - M(RIGHT_OUTER_JOIN) \ - M(CROSS_JOIN) \ - M(MERGE_JOIN) \ - M(RIGHT_SEMI_JOIN) \ - M(RIGHT_ANTI_JOIN) \ +#define APPLY_FOR_JOINOP_VARIANTS(M) \ + M(INNER_JOIN) \ + M(LEFT_SEMI_JOIN) \ + M(LEFT_ANTI_JOIN) \ + M(LEFT_OUTER_JOIN) \ + M(FULL_OUTER_JOIN) \ + M(RIGHT_OUTER_JOIN) \ + M(CROSS_JOIN) \ + M(MERGE_JOIN) \ + M(RIGHT_SEMI_JOIN) \ + M(RIGHT_ANTI_JOIN) \ M(NULL_AWARE_LEFT_ANTI_JOIN) class VExprContext; @@ -236,6 +237,7 @@ private: std::vector _hash_output_slot_ids; std::vector _left_output_slot_flags; std::vector _right_output_slot_flags; + private: Status _hash_table_build(RuntimeState* state); Status _process_build_block(RuntimeState* state, Block& block, uint8_t offset); diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index 0942e24758..e7ff52f1d6 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -210,7 +210,8 @@ Status AggregationNode::prepare(RuntimeState* state) { _merge_timer = ADD_TIMER(runtime_profile(), "MergeTime"); _expr_timer = ADD_TIMER(runtime_profile(), "ExprTime"); _get_results_timer = ADD_TIMER(runtime_profile(), "GetResultsTime"); - _data_mem_tracker = MemTracker::create_virtual_tracker(-1, "AggregationNode:Data", mem_tracker()); + _data_mem_tracker = + MemTracker::create_virtual_tracker(-1, "AggregationNode:Data", mem_tracker()); _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id); _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size()); @@ -551,7 +552,8 @@ Status AggregationNode::_merge_without_key(Block* block) { } } else { _aggregate_evaluators[i]->execute_single_add( - block, _agg_data.without_key + _offsets_of_aggregate_states[i], &_agg_arena_pool); + block, _agg_data.without_key + _offsets_of_aggregate_states[i], + &_agg_arena_pool); } } return Status::OK(); @@ -570,10 +572,8 @@ void AggregationNode::_close_without_key() { void AggregationNode::_make_nullable_output_key(Block* block) { if (block->rows() != 0) { for (auto cid : _make_nullable_keys) { - block->get_by_position(cid).column = - make_nullable(block->get_by_position(cid).column); - block->get_by_position(cid).type = - make_nullable(block->get_by_position(cid).type); + block->get_by_position(cid).column = make_nullable(block->get_by_position(cid).column); + block->get_by_position(cid).type = make_nullable(block->get_by_position(cid).type); } } } @@ -680,8 +680,8 @@ Status AggregationNode::_pre_agg_with_serialized_key(doris::vectorized::Block* i for (int i = 0; i < _aggregate_evaluators.size(); ++i) { _aggregate_evaluators[i]->execute_batch_add( - in_block, _offsets_of_aggregate_states[i], _streaming_pre_places.data(), - &_agg_arena_pool); + in_block, _offsets_of_aggregate_states[i], + _streaming_pre_places.data(), &_agg_arena_pool); } // will serialize value data to string column @@ -738,7 +738,7 @@ Status AggregationNode::_pre_agg_with_serialized_key(doris::vectorized::Block* i if (!ret_flag) { std::visit( - [&](auto &&agg_method) -> void { + [&](auto&& agg_method) -> void { using HashMethodType = std::decay_t; using AggState = typename HashMethodType::State; AggState state(key_columns, _probe_key_sz, nullptr); @@ -746,7 +746,8 @@ Status AggregationNode::_pre_agg_with_serialized_key(doris::vectorized::Block* i for (size_t i = 0; i < rows; ++i) { AggregateDataPtr aggregate_data = nullptr; - auto emplace_result = state.emplace_key(agg_method.data, i, _agg_arena_pool); + auto emplace_result = + state.emplace_key(agg_method.data, i, _agg_arena_pool); /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key. if (emplace_result.is_inserted()) { @@ -989,7 +990,8 @@ Status AggregationNode::_serialize_with_serialized_key_result(RuntimeState* stat ColumnsWithTypeAndName columns_with_schema; for (int i = 0; i < key_size; ++i) { columns_with_schema.emplace_back(std::move(key_columns[i]), - _probe_expr_ctxs[i]->root()->data_type(), _probe_expr_ctxs[i]->root()->expr_name()); + _probe_expr_ctxs[i]->root()->data_type(), + _probe_expr_ctxs[i]->root()->expr_name()); } for (int i = 0; i < agg_size; ++i) { columns_with_schema.emplace_back(std::move(value_columns[i]), value_data_types[i], ""); @@ -1081,9 +1083,10 @@ void AggregationNode::_update_memusage_with_serialized_key() { std::visit( [&](auto&& agg_method) -> void { auto& data = agg_method.data; - _data_mem_tracker->consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); + _data_mem_tracker->consume(_agg_arena_pool.size() - + _mem_usage_record.used_in_arena); _data_mem_tracker->consume(data.get_buffer_size_in_bytes() - - _mem_usage_record.used_in_state); + _mem_usage_record.used_in_state); _mem_usage_record.used_in_state = data.get_buffer_size_in_bytes(); _mem_usage_record.used_in_arena = _agg_arena_pool.size(); }, @@ -1106,7 +1109,7 @@ void AggregationNode::_close_with_serialized_key() { } void AggregationNode::release_tracker() { - _data_mem_tracker->release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena); + _data_mem_tracker->release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena); } } // namespace doris::vectorized diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp index 6861421e12..b4fe98ec86 100644 --- a/be/src/vec/exec/vanalytic_eval_node.cpp +++ b/be/src/vec/exec/vanalytic_eval_node.cpp @@ -38,7 +38,8 @@ VAnalyticEvalNode::VAnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, } _fn_scope = AnalyticFnScope::PARTITION; - if (!tnode.analytic_node.__isset.window) { //haven't set window, Unbounded: [unbounded preceding,unbounded following] + if (!tnode.analytic_node.__isset + .window) { //haven't set window, Unbounded: [unbounded preceding,unbounded following] _executor.get_next = std::bind(&VAnalyticEvalNode::_get_next_for_partition, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); @@ -49,7 +50,8 @@ VAnalyticEvalNode::VAnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, _window.window_end.type == TAnalyticWindowBoundaryType::CURRENT_ROW) << "RANGE window end bound must be CURRENT ROW or UNBOUNDED FOLLOWING"; - if (!_window.__isset.window_end) { //haven't set end, so same as PARTITION, [unbounded preceding, unbounded following] + if (!_window.__isset + .window_end) { //haven't set end, so same as PARTITION, [unbounded preceding, unbounded following] _executor.get_next = std::bind(&VAnalyticEvalNode::_get_next_for_partition, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); @@ -69,7 +71,7 @@ VAnalyticEvalNode::VAnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, } else { if (_window.__isset.window_start) { //calculate start boundary TAnalyticWindowBoundary b = _window.window_start; - if (b.__isset.rows_offset_value) { //[offset , ] + if (b.__isset.rows_offset_value) { //[offset , ] _rows_start_offset = b.rows_offset_value; if (b.type == TAnalyticWindowBoundaryType::PRECEDING) { _rows_start_offset *= -1; //preceding--> negative @@ -82,7 +84,7 @@ VAnalyticEvalNode::VAnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, if (_window.__isset.window_end) { //calculate end boundary TAnalyticWindowBoundary b = _window.window_end; - if (b.__isset.rows_offset_value) { //[ , offset] + if (b.__isset.rows_offset_value) { //[ , offset] _rows_end_offset = b.rows_offset_value; if (b.type == TAnalyticWindowBoundaryType::PRECEDING) { _rows_end_offset *= -1; @@ -124,15 +126,18 @@ Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { } AggFnEvaluator* evaluator = nullptr; - RETURN_IF_ERROR(AggFnEvaluator::create(_pool, analytic_node.analytic_functions[i], &evaluator)); + RETURN_IF_ERROR( + AggFnEvaluator::create(_pool, analytic_node.analytic_functions[i], &evaluator)); _agg_functions.emplace_back(evaluator); for (size_t j = 0; j < _agg_expr_ctxs[i].size(); ++j) { _agg_intput_columns[i][j] = _agg_expr_ctxs[i][j]->root()->data_type()->create_column(); } } - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, analytic_node.partition_exprs, &_partition_by_eq_expr_ctxs)); - RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, analytic_node.order_by_exprs, &_order_by_eq_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, analytic_node.partition_exprs, + &_partition_by_eq_expr_ctxs)); + RETURN_IF_ERROR( + VExpr::create_expr_trees(_pool, analytic_node.order_by_exprs, &_order_by_eq_expr_ctxs)); _partition_by_column_idxs.resize(_partition_by_eq_expr_ctxs.size()); _ordey_by_column_idxs.resize(_order_by_eq_expr_ctxs.size()); _agg_functions_size = _agg_functions.size(); @@ -255,7 +260,9 @@ Status VAnalyticEvalNode::_get_next_for_partition(RuntimeState* state, Block* bl while (!_input_eos || _output_block_index < _input_blocks.size()) { bool next_partition = false; RETURN_IF_ERROR(_consumed_block_and_init_partition(state, &next_partition, eos)); - if (*eos) { break; } + if (*eos) { + break; + } size_t current_block_rows = _input_blocks[_output_block_index].rows(); if (next_partition) { @@ -274,10 +281,13 @@ Status VAnalyticEvalNode::_get_next_for_range(RuntimeState* state, Block* block, while (!_input_eos || _output_block_index < _input_blocks.size()) { bool next_partition = false; RETURN_IF_ERROR(_consumed_block_and_init_partition(state, &next_partition, eos)); - if (*eos) { break; } + if (*eos) { + break; + } size_t current_block_rows = _input_blocks[_output_block_index].rows(); - while (_current_row_position < _partition_by_end.pos && _window_end_position < current_block_rows) { + while (_current_row_position < _partition_by_end.pos && + _window_end_position < current_block_rows) { if (_current_row_position >= _order_by_end.pos) { _update_order_by_range(); _executor.execute(_order_by_start, _order_by_end, _order_by_start, _order_by_end); @@ -295,18 +305,25 @@ Status VAnalyticEvalNode::_get_next_for_rows(RuntimeState* state, Block* block, while (!_input_eos || _output_block_index < _input_blocks.size()) { bool next_partition = false; RETURN_IF_ERROR(_consumed_block_and_init_partition(state, &next_partition, eos)); - if (*eos) { break; } + if (*eos) { + break; + } size_t current_block_rows = _input_blocks[_output_block_index].rows(); - while (_current_row_position < _partition_by_end.pos && _window_end_position < current_block_rows) { + while (_current_row_position < _partition_by_end.pos && + _window_end_position < current_block_rows) { BlockRowPos range_start, range_end; if (!_window.__isset.window_start && - _window.window_end.type == TAnalyticWindowBoundaryType::CURRENT_ROW) { //[preceding, current_row],[current_row, following] + _window.window_end.type == + TAnalyticWindowBoundaryType:: + CURRENT_ROW) { //[preceding, current_row],[current_row, following] range_start.pos = _current_row_position; - range_end.pos = _current_row_position + 1; //going on calculate,add up data, no need to reset state + range_end.pos = _current_row_position + + 1; //going on calculate,add up data, no need to reset state } else { _reset_agg_status(); - if (!_window.__isset.window_start) { //[preceding, offset] --unbound: [preceding, following] + if (!_window.__isset + .window_start) { //[preceding, offset] --unbound: [preceding, following] range_start.pos = _partition_by_start.pos; } else { range_start.pos = _current_row_position + _rows_start_offset; @@ -326,7 +343,8 @@ Status VAnalyticEvalNode::_get_next_for_rows(RuntimeState* state, Block* block, Status VAnalyticEvalNode::_consumed_block_and_init_partition(RuntimeState* state, bool* next_partition, bool* eos) { BlockRowPos found_partition_end = _get_partition_by_end(); //claculate current partition end - while (whether_need_next_partition(found_partition_end)) { //check whether need get next partition, if current partition haven't execute done, return false + while (whether_need_next_partition( + found_partition_end)) { //check whether need get next partition, if current partition haven't execute done, return false RETURN_IF_ERROR(_fetch_next_block_data(state)); //return true, fetch next block found_partition_end = _get_partition_by_end(); //claculate new partition end } @@ -343,18 +361,21 @@ Status VAnalyticEvalNode::_consumed_block_and_init_partition(RuntimeState* state BlockRowPos VAnalyticEvalNode::_get_partition_by_end() { SCOPED_TIMER(_evaluation_timer); - if (_current_row_position < _partition_by_end.pos) { //still have data, return partition_by_end directly + if (_current_row_position < + _partition_by_end.pos) { //still have data, return partition_by_end directly return _partition_by_end; } - if (_partition_by_eq_expr_ctxs.empty() || (_input_total_rows == 0)) { //no partition_by, the all block is end + if (_partition_by_eq_expr_ctxs.empty() || + (_input_total_rows == 0)) { //no partition_by, the all block is end return _all_block_end; } BlockRowPos cal_end = _all_block_end; - for (size_t i = 0; i < _partition_by_eq_expr_ctxs.size(); ++i) { //have partition_by, binary search the partiton end - cal_end = _compare_row_to_find_end(_partition_by_column_idxs[i], _partition_by_end, - cal_end); + for (size_t i = 0; i < _partition_by_eq_expr_ctxs.size(); + ++i) { //have partition_by, binary search the partiton end + cal_end = + _compare_row_to_find_end(_partition_by_column_idxs[i], _partition_by_end, cal_end); } cal_end.pos = input_block_first_row_positions[cal_end.block_num] + cal_end.row_num; return cal_end; @@ -386,7 +407,8 @@ BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos sta if (end_block_num == mid_blcok_num - 1) { start_next_block_column = _input_blocks[end_block_num].get_by_position(idx).column; int64_t block_size = _input_blocks[end_block_num].rows(); - if ((start_column->compare_at(start_init_row_num, block_size - 1, *start_next_block_column, 1) == 0)) { + if ((start_column->compare_at(start_init_row_num, block_size - 1, *start_next_block_column, + 1) == 0)) { start.block_num = end_block_num + 1; start.row_num = 0; return start; @@ -451,13 +473,15 @@ Status VAnalyticEvalNode::_fetch_next_block_data(RuntimeState* state) { _all_block_end.row_num = block_rows; _all_block_end.pos = _input_total_rows; - if (_origin_cols.empty()) { //record origin columns, maybe be after this, could cast some column but no need to save + if (_origin_cols + .empty()) { //record origin columns, maybe be after this, could cast some column but no need to save for (int c = 0; c < block.columns(); ++c) { _origin_cols.emplace_back(c); } } - for (size_t i = 0; i < _agg_functions_size; ++i) { //insert _agg_intput_columns, execute calculate for its + for (size_t i = 0; i < _agg_functions_size; + ++i) { //insert _agg_intput_columns, execute calculate for its for (size_t j = 0; j < _agg_expr_ctxs[i].size(); ++j) { RETURN_IF_ERROR(_insert_range_column(&block, _agg_expr_ctxs[i][j], _agg_intput_columns[i][j].get(), block_rows)); diff --git a/be/src/vec/exec/vassert_num_rows_node.cpp b/be/src/vec/exec/vassert_num_rows_node.cpp index d802ba12e0..0614b56631 100644 --- a/be/src/vec/exec/vassert_num_rows_node.cpp +++ b/be/src/vec/exec/vassert_num_rows_node.cpp @@ -27,7 +27,7 @@ namespace doris::vectorized { VAssertNumRowsNode::VAssertNumRowsNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) + const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), _desired_num_rows(tnode.assert_num_rows_node.desired_num_rows), _subquery_string(tnode.assert_num_rows_node.subquery_string) { @@ -96,4 +96,4 @@ Status VAssertNumRowsNode::get_next(RuntimeState* state, Block* block, bool* eos return Status::OK(); } -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exec/vblocking_join_node.cpp b/be/src/vec/exec/vblocking_join_node.cpp index acfec89b26..05140888ae 100644 --- a/be/src/vec/exec/vblocking_join_node.cpp +++ b/be/src/vec/exec/vblocking_join_node.cpp @@ -27,9 +27,11 @@ namespace doris::vectorized { VBlockingJoinNode::VBlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, - ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), _node_name(node_name), _join_op(join_op), + ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + _node_name(node_name), + _join_op(join_op), _left_side_eos(false) {} Status VBlockingJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { @@ -91,7 +93,8 @@ Status VBlockingJoinNode::open(RuntimeState* state) { std::promise build_side_status; add_runtime_exec_option("Join Build-Side Prepared Asynchronously"); - std::thread(bind(&VBlockingJoinNode::build_side_thread, this, state, &build_side_status)).detach(); + std::thread(bind(&VBlockingJoinNode::build_side_thread, this, state, &build_side_status)) + .detach(); // Open the left child so that it may perform any initialisation in parallel. // Don't exit even if we see an error, we still need to wait for the build thread @@ -140,4 +143,4 @@ void VBlockingJoinNode::debug_string(int indentation_level, std::stringstream* o *out << ")"; } -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exec/vblocking_join_node.h b/be/src/vec/exec/vblocking_join_node.h index 80ddb5acf8..b6d2b7effd 100644 --- a/be/src/vec/exec/vblocking_join_node.h +++ b/be/src/vec/exec/vblocking_join_node.h @@ -36,34 +36,34 @@ namespace vectorized { // right child in open(). class VBlockingJoinNode : public doris::ExecNode { public: - VBlockingJoinNode(const std::string &node_name, const TJoinOp::type join_op, ObjectPool *pool, - const TPlanNode &tnode, const DescriptorTbl &descs); + VBlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, ObjectPool* pool, + const TPlanNode& tnode, const DescriptorTbl& descs); virtual ~VBlockingJoinNode() = default; // Subclasses should call VBlockingJoinNode::init() and then perform any other init() // work, e.g. creating expr trees. - virtual Status init(const TPlanNode &tnode, RuntimeState *state = nullptr); + virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); // Subclasses should call VBlockingJoinNode::prepare() and then perform any other // prepare() work, e.g. codegen. - virtual Status prepare(RuntimeState *state); + virtual Status prepare(RuntimeState* state); // Open prepares the build side structures (subclasses should implement // construct_build_side()) and then prepares for GetNext with the first left child row // (subclasses should implement init_get_next()). - virtual Status open(RuntimeState *state); + virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState *state, RowBatch *row_batch, bool *eos) { + virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { return Status::NotSupported("Not Implemented VBlocking Join Node::get_next scalar"); } - virtual Status close(RuntimeState *state); + virtual Status close(RuntimeState* state); private: const std::string _node_name; TJoinOp::type _join_op; - bool _eos; // if true, nothing left to return in get_next() + bool _eos; // if true, nothing left to return in get_next() std::unique_ptr _build_pool; // holds everything referenced from build side // _left_block must be cleared before calling get_next(). The child node @@ -82,10 +82,10 @@ private: // This should be the same size as the left child tuple row. int _result_tuple_row_size; - RuntimeProfile::Counter *_build_timer; // time to prepare build side - RuntimeProfile::Counter *_left_child_timer; // time to process left child batch - RuntimeProfile::Counter *_build_row_counter; // num build rows - RuntimeProfile::Counter *_left_child_row_counter; // num left child rows + RuntimeProfile::Counter* _build_timer; // time to prepare build side + RuntimeProfile::Counter* _left_child_timer; // time to process left child batch + RuntimeProfile::Counter* _build_row_counter; // num build rows + RuntimeProfile::Counter* _left_child_row_counter; // num left child rows // Init the build-side state for a new left child row (e.g. hash table iterator or list // iterator) given the first row. Used in open() to prepare for get_next(). @@ -96,34 +96,34 @@ private: // left child. If, for example, the left child is another // join node, it can start to build its own build-side at the // same time. - virtual Status construct_build_side(RuntimeState *state) = 0; + virtual Status construct_build_side(RuntimeState* state) = 0; // Gives subclasses an opportunity to add debug output to the debug string printed by // debug_string(). - virtual void add_to_debug_string(int indentation_level, std::stringstream *out) const {} + virtual void add_to_debug_string(int indentation_level, std::stringstream* out) const {} // Subclasses should not override, use add_to_debug_string() to add to the result. - virtual void debug_string(int indentation_level, std::stringstream *out) const; + virtual void debug_string(int indentation_level, std::stringstream* out) const; -// // Returns a debug string for the left child's 'row'. They have tuple ptrs that are -// // uninitialized; the left child only populates the tuple ptrs it is responsible -// // for. This function outputs just the row values and leaves the build -// // side values as NULL. -// // This is only used for debugging and outputting the left child rows before -// // doing the join. -// std::string get_left_child_row_string(TupleRow *row); -// -// // Write combined row, consisting of the left child's 'left_row' and right child's -// // 'build_row' to 'out_row'. -// // This is replaced by codegen. -// void create_output_row(TupleRow *out_row, TupleRow *left_row, TupleRow *build_row); -// + // // Returns a debug string for the left child's 'row'. They have tuple ptrs that are + // // uninitialized; the left child only populates the tuple ptrs it is responsible + // // for. This function outputs just the row values and leaves the build + // // side values as NULL. + // // This is only used for debugging and outputting the left child rows before + // // doing the join. + // std::string get_left_child_row_string(TupleRow *row); + // + // // Write combined row, consisting of the left child's 'left_row' and right child's + // // 'build_row' to 'out_row'. + // // This is replaced by codegen. + // void create_output_row(TupleRow *out_row, TupleRow *left_row, TupleRow *build_row); + // friend class VCrossJoinNode; private: // Supervises ConstructBuildSide in a separate thread, and returns its status in the // promise parameter. - void build_side_thread(RuntimeState *state, std::promise *status); + void build_side_thread(RuntimeState* state, std::promise* status); }; } // namespace vectorized diff --git a/be/src/vec/exec/vbroker_scan_node.cpp b/be/src/vec/exec/vbroker_scan_node.cpp index e183cdd06c..c1d597824c 100644 --- a/be/src/vec/exec/vbroker_scan_node.cpp +++ b/be/src/vec/exec/vbroker_scan_node.cpp @@ -114,8 +114,7 @@ Status VBrokerScanNode::close(RuntimeState* state) { return status; } -Status VBrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, - ScannerCounter* counter) { +Status VBrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, ScannerCounter* counter) { //create scanner object and open std::unique_ptr scanner = create_scanner(scan_range, counter); RETURN_IF_ERROR(scanner->open()); @@ -225,4 +224,4 @@ void VBrokerScanNode::scanner_worker(int start_idx, int length) { } } -} \ No newline at end of file +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exec/vbroker_scan_node.h b/be/src/vec/exec/vbroker_scan_node.h index 1a1b8eb4e0..ee7f763815 100644 --- a/be/src/vec/exec/vbroker_scan_node.h +++ b/be/src/vec/exec/vbroker_scan_node.h @@ -43,8 +43,7 @@ private: void scanner_worker(int start_idx, int length); // Scan one range - Status scanner_scan(const TBrokerScanRange& scan_range, - ScannerCounter* counter); + Status scanner_scan(const TBrokerScanRange& scan_range, ScannerCounter* counter); std::deque> _block_queue; }; diff --git a/be/src/vec/exec/vbroker_scanner.cpp b/be/src/vec/exec/vbroker_scanner.cpp index 3a79881858..28e2f24c22 100644 --- a/be/src/vec/exec/vbroker_scanner.cpp +++ b/be/src/vec/exec/vbroker_scanner.cpp @@ -27,14 +27,12 @@ namespace doris::vectorized { VBrokerScanner::VBrokerScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, - ScannerCounter* counter) - : BrokerScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter) { - -} + const TBrokerScanRangeParams& params, + const std::vector& ranges, + const std::vector& broker_addresses, + const std::vector& pre_filter_texprs, ScannerCounter* counter) + : BrokerScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, + counter) {} Status VBrokerScanner::get_next(std::vector& columns, bool* eof) { SCOPED_TIMER(_read_timer); @@ -168,14 +166,12 @@ Status VBrokerScanner::_fill_dest_columns(std::vector& columns _success = false; return Status::OK(); } - auto* nullable_column = - reinterpret_cast(column_ptr); + auto* nullable_column = reinterpret_cast(column_ptr); nullable_column->insert_data(nullptr, 0); continue; } if (slot_desc->is_nullable()) { - auto* nullable_column = - reinterpret_cast(column_ptr); + auto* nullable_column = reinterpret_cast(column_ptr); nullable_column->get_null_map_data().push_back(0); column_ptr = &nullable_column->get_nested_column(); } diff --git a/be/src/vec/exec/vbroker_scanner.h b/be/src/vec/exec/vbroker_scanner.h index 89d077168f..46128100ab 100644 --- a/be/src/vec/exec/vbroker_scanner.h +++ b/be/src/vec/exec/vbroker_scanner.h @@ -19,14 +19,14 @@ #include - namespace doris::vectorized { class VBrokerScanner final : public BrokerScanner { public: VBrokerScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter); + const TBrokerScanRangeParams& params, + const std::vector& ranges, + const std::vector& broker_addresses, + const std::vector& pre_filter_texprs, ScannerCounter* counter); ~VBrokerScanner() override = default; Status get_next(std::vector& columns, bool* eof) override; @@ -34,6 +34,5 @@ public: private: Status _convert_one_row(const Slice& line, std::vector& columns); Status _fill_dest_columns(std::vector& columns); - }; } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exec/vcross_join_node.cpp b/be/src/vec/exec/vcross_join_node.cpp index 01d96d69de..27740d9f8e 100644 --- a/be/src/vec/exec/vcross_join_node.cpp +++ b/be/src/vec/exec/vcross_join_node.cpp @@ -34,7 +34,8 @@ Status VCrossJoinNode::prepare(RuntimeState* state) { DCHECK(_join_op == TJoinOp::CROSS_JOIN); RETURN_IF_ERROR(VBlockingJoinNode::prepare(state)); SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker()); - _block_mem_tracker = MemTracker::create_virtual_tracker(-1, "VCrossJoinNode:Block", mem_tracker()); + _block_mem_tracker = + MemTracker::create_virtual_tracker(-1, "VCrossJoinNode:Block", mem_tracker()); _num_existing_columns = child(0)->row_desc().num_materialized_slots(); _num_columns_to_add = child(1)->row_desc().num_materialized_slots(); @@ -54,7 +55,8 @@ Status VCrossJoinNode::close(RuntimeState* state) { Status VCrossJoinNode::construct_build_side(RuntimeState* state) { // Do a full scan of child(1) and store all build row batches. RETURN_IF_ERROR(child(1)->open(state)); - SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB("Vec Cross join, while getting next from the child 1"); + SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB( + "Vec Cross join, while getting next from the child 1"); bool eos = false; while (true) { diff --git a/be/src/vec/exec/vempty_set_node.cpp b/be/src/vec/exec/vempty_set_node.cpp index 1d33f94517..620ea0e960 100644 --- a/be/src/vec/exec/vempty_set_node.cpp +++ b/be/src/vec/exec/vempty_set_node.cpp @@ -19,12 +19,12 @@ namespace doris { namespace vectorized { - VEmptySetNode::VEmptySetNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) +VEmptySetNode::VEmptySetNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs) {} - Status VEmptySetNode::get_next(RuntimeState* state, Block* block, bool* eos) { - *eos = true; - return Status::OK(); - } +Status VEmptySetNode::get_next(RuntimeState* state, Block* block, bool* eos) { + *eos = true; + return Status::OK(); +} } // namespace vectorized } // namespace doris diff --git a/be/src/vec/exec/vexcept_node.cpp b/be/src/vec/exec/vexcept_node.cpp index 4240743704..9176a75c8a 100644 --- a/be/src/vec/exec/vexcept_node.cpp +++ b/be/src/vec/exec/vexcept_node.cpp @@ -63,7 +63,6 @@ Status VExceptNode::open(RuntimeState* state) { [&](auto&& arg) { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { - HashTableProbe process_hashtable_ctx( this, state->batch_size(), _probe_rows); st = process_hashtable_ctx.mark_data_in_hashtable(arg); @@ -87,7 +86,6 @@ Status VExceptNode::get_next(RuntimeState* state, Block* output_block, bool* eos [&](auto&& arg) { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { - HashTableProbe process_hashtable_ctx( this, state->batch_size(), _probe_rows); st = process_hashtable_ctx.get_data_in_hashtable(arg, _mutable_cols, @@ -98,7 +96,8 @@ Status VExceptNode::get_next(RuntimeState* state, Block* output_block, bool* eos }, _hash_table_variants); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR( + VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); reached_limit(output_block, eos); return st; diff --git a/be/src/vec/exec/vintersect_node.cpp b/be/src/vec/exec/vintersect_node.cpp index 65ee121b26..7b8263ca9e 100644 --- a/be/src/vec/exec/vintersect_node.cpp +++ b/be/src/vec/exec/vintersect_node.cpp @@ -45,7 +45,7 @@ Status VIntersectNode::open(RuntimeState* state) { RETURN_IF_ERROR(VSetOperationNode::open(state)); bool eos = false; Status st; - + for (int i = 1; i < _children.size(); ++i) { if (i > 1) { refresh_hash_table(); @@ -98,7 +98,8 @@ Status VIntersectNode::get_next(RuntimeState* state, Block* output_block, bool* }, _hash_table_variants); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); + RETURN_IF_ERROR( + VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns())); reached_limit(output_block, eos); return st; diff --git a/be/src/vec/exec/vodbc_scan_node.cpp b/be/src/vec/exec/vodbc_scan_node.cpp index dc685c57cc..938c226fa6 100644 --- a/be/src/vec/exec/vodbc_scan_node.cpp +++ b/be/src/vec/exec/vodbc_scan_node.cpp @@ -119,7 +119,6 @@ Status VOdbcScanNode::get_next(RuntimeState* state, Block* block, bool* eos) { VLOG_ROW << "VOdbcScanNode output rows: " << block->rows(); } while (block->rows() == 0 && !(*eos)); - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns())); reached_limit(block, eos); diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h index 8c1ccca99e..f0a0221ca9 100644 --- a/be/src/vec/exec/volap_scanner.h +++ b/be/src/vec/exec/volap_scanner.h @@ -34,7 +34,7 @@ class VOlapScanner : public OlapScanner { public: VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, bool aggregation, bool need_agg_finalize, const TPaloScanRange& scan_range, - std::shared_ptr tracker); + std::shared_ptr tracker); Status get_block(RuntimeState* state, vectorized::Block* block, bool* eof); diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp index dfd2c08dcf..c1aed663a1 100644 --- a/be/src/vec/exec/vschema_scan_node.cpp +++ b/be/src/vec/exec/vschema_scan_node.cpp @@ -28,8 +28,11 @@ #include "util/types.h" namespace doris::vectorized { -VSchemaScanNode::VSchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : SchemaScanNode(pool, tnode, descs), _src_single_tuple(nullptr), _dest_single_tuple(nullptr) {} +VSchemaScanNode::VSchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs) + : SchemaScanNode(pool, tnode, descs), + _src_single_tuple(nullptr), + _dest_single_tuple(nullptr) {} VSchemaScanNode::~VSchemaScanNode() { delete[] reinterpret_cast(_src_single_tuple); @@ -42,12 +45,14 @@ VSchemaScanNode::~VSchemaScanNode() { Status VSchemaScanNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(SchemaScanNode::prepare(state)); - _src_single_tuple = reinterpret_cast(new (std::nothrow) char[_src_tuple_desc->byte_size()]); + _src_single_tuple = + reinterpret_cast(new (std::nothrow) char[_src_tuple_desc->byte_size()]); if (NULL == _src_single_tuple) { return Status::InternalError("new src single tuple failed."); } - _dest_single_tuple = reinterpret_cast(new (std::nothrow) char[_dest_tuple_desc->byte_size()]); + _dest_single_tuple = + reinterpret_cast(new (std::nothrow) char[_dest_tuple_desc->byte_size()]); if (NULL == _dest_single_tuple) { return Status::InternalError("new desc single tuple failed."); } @@ -81,7 +86,8 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block, RETURN_IF_CANCELLED(state); // get all slots from schema table. - RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_single_tuple, _tuple_pool.get(), &schema_eos)); + RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_single_tuple, _tuple_pool.get(), + &schema_eos)); if (schema_eos) { *eos = true; @@ -108,8 +114,9 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block, return Status::InternalError(ss.str()); } } else { - RETURN_IF_ERROR( - write_slot_to_vectorized_column(_dest_single_tuple->get_slot(slot_desc->tuple_offset()), slot_desc, &columns[i])); + RETURN_IF_ERROR(write_slot_to_vectorized_column( + _dest_single_tuple->get_slot(slot_desc->tuple_offset()), slot_desc, + &columns[i])); } } if (columns[0]->size() == state->batch_size()) { @@ -127,7 +134,8 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block, } else { columns.clear(); } - RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, _dest_tuple_desc->slots().size())); + RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, + _dest_tuple_desc->slots().size())); VLOG_ROW << "VSchemaScanNode output rows: " << block->rows(); } } while (block->rows() == 0 && !(*eos)); @@ -136,8 +144,7 @@ Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block, return Status::OK(); } -Status VSchemaScanNode::write_slot_to_vectorized_column(void* slot, - SlotDescriptor* slot_desc, +Status VSchemaScanNode::write_slot_to_vectorized_column(void* slot, SlotDescriptor* slot_desc, vectorized::MutableColumnPtr* column_ptr) { vectorized::IColumn* col_ptr = column_ptr->get(); if (slot_desc->is_nullable()) { @@ -146,99 +153,103 @@ Status VSchemaScanNode::write_slot_to_vectorized_column(void* slot, col_ptr = &nullable_column->get_nested_column(); } switch (slot_desc->type().type) { - case TYPE_HLL: { - HyperLogLog* hll_slot = reinterpret_cast(slot); - reinterpret_cast(col_ptr)->get_data().emplace_back(*hll_slot); - break; - } - case TYPE_VARCHAR: - case TYPE_CHAR: - case TYPE_STRING: { - StringValue* str_slot = reinterpret_cast(slot); - reinterpret_cast(col_ptr)->insert_data(str_slot->ptr, str_slot->len); - break; - } + case TYPE_HLL: { + HyperLogLog* hll_slot = reinterpret_cast(slot); + reinterpret_cast(col_ptr)->get_data().emplace_back(*hll_slot); + break; + } + case TYPE_VARCHAR: + case TYPE_CHAR: + case TYPE_STRING: { + StringValue* str_slot = reinterpret_cast(slot); + reinterpret_cast(col_ptr)->insert_data(str_slot->ptr, + str_slot->len); + break; + } - case TYPE_BOOLEAN: { - uint8_t num = *reinterpret_cast(slot); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_BOOLEAN: { + uint8_t num = *reinterpret_cast(slot); + reinterpret_cast*>(col_ptr)->insert_value(num); + break; + } - case TYPE_TINYINT: { - int8_t num = *reinterpret_cast(slot); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_TINYINT: { + int8_t num = *reinterpret_cast(slot); + reinterpret_cast*>(col_ptr)->insert_value(num); + break; + } - case TYPE_SMALLINT: { - int16_t num = *reinterpret_cast(slot); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_SMALLINT: { + int16_t num = *reinterpret_cast(slot); + reinterpret_cast*>(col_ptr)->insert_value(num); + break; + } - case TYPE_INT: { - int32_t num = *reinterpret_cast(slot); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_INT: { + int32_t num = *reinterpret_cast(slot); + reinterpret_cast*>(col_ptr)->insert_value(num); + break; + } - case TYPE_BIGINT: { - int64_t num = *reinterpret_cast(slot); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_BIGINT: { + int64_t num = *reinterpret_cast(slot); + reinterpret_cast*>(col_ptr)->insert_value(num); + break; + } - case TYPE_LARGEINT: { - __int128 num; - memcpy(&num, slot, sizeof(__int128)); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_LARGEINT: { + __int128 num; + memcpy(&num, slot, sizeof(__int128)); + reinterpret_cast*>(col_ptr)->insert_value(num); + break; + } - case TYPE_FLOAT: { - float num = *reinterpret_cast(slot); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_FLOAT: { + float num = *reinterpret_cast(slot); + reinterpret_cast*>(col_ptr)->insert_value( + num); + break; + } - case TYPE_DOUBLE: { - double num = *reinterpret_cast(slot); - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_DOUBLE: { + double num = *reinterpret_cast(slot); + reinterpret_cast*>(col_ptr)->insert_value( + num); + break; + } - case TYPE_DATE: { - VecDateTimeValue value; - DateTimeValue* ts_slot = reinterpret_cast(slot); - value.convert_dt_to_vec_dt(ts_slot); - reinterpret_cast*>(col_ptr)->insert_data( + case TYPE_DATE: { + VecDateTimeValue value; + DateTimeValue* ts_slot = reinterpret_cast(slot); + value.convert_dt_to_vec_dt(ts_slot); + reinterpret_cast*>(col_ptr)->insert_data( reinterpret_cast(&value), 0); - break; - } + break; + } - case TYPE_DATETIME: { - VecDateTimeValue value; - DateTimeValue* ts_slot = reinterpret_cast(slot); - value.convert_dt_to_vec_dt(ts_slot); - reinterpret_cast*>(col_ptr)->insert_data( + case TYPE_DATETIME: { + VecDateTimeValue value; + DateTimeValue* ts_slot = reinterpret_cast(slot); + value.convert_dt_to_vec_dt(ts_slot); + reinterpret_cast*>(col_ptr)->insert_data( reinterpret_cast(&value), 0); - break; - } + break; + } - case TYPE_DECIMALV2: { - __int128 num = (reinterpret_cast(slot))->value; - reinterpret_cast*>(col_ptr)->insert_value(num); - break; - } + case TYPE_DECIMALV2: { + __int128 num = (reinterpret_cast(slot))->value; + reinterpret_cast*>(col_ptr)->insert_value( + num); + break; + } - default: { - DCHECK(false) << "bad slot type: " << slot_desc->type(); - std::stringstream ss; - ss << "Fail to convert schema type:'" << slot_desc->type() << " on column:`" - << slot_desc->col_name() + "`"; - return Status::InternalError(ss.str()); - } + default: { + DCHECK(false) << "bad slot type: " << slot_desc->type(); + std::stringstream ss; + ss << "Fail to convert schema type:'" << slot_desc->type() << " on column:`" + << slot_desc->col_name() + "`"; + return Status::InternalError(ss.str()); + } } return Status::OK(); @@ -256,8 +267,10 @@ void VSchemaScanNode::project_tuple() { if (_src_single_tuple->is_null(_src_tuple_desc->slots()[j]->null_indicator_offset())) { _dest_single_tuple->set_null(_dest_tuple_desc->slots()[i]->null_indicator_offset()); } else { - void* dest_slot = _dest_single_tuple->get_slot(_dest_tuple_desc->slots()[i]->tuple_offset()); - void* src_slot = _src_single_tuple->get_slot(_src_tuple_desc->slots()[j]->tuple_offset()); + void* dest_slot = + _dest_single_tuple->get_slot(_dest_tuple_desc->slots()[i]->tuple_offset()); + void* src_slot = + _src_single_tuple->get_slot(_src_tuple_desc->slots()[j]->tuple_offset()); int slot_size = _src_tuple_desc->slots()[j]->type().get_slot_size(); memcpy(dest_slot, src_slot, slot_size); } diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index 14cb8dec65..8d27120ed1 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -37,7 +37,7 @@ struct HashTableBuild { using KeyGetter = typename HashTableContext::State; using Mapped = typename HashTableContext::Mapped; int64_t old_bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); - + Defer defer {[&]() { int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); _operation_node->_hash_table_mem_tracker->consume(bucket_bytes - old_bucket_bytes); @@ -231,7 +231,7 @@ void VSetOperationNode::hash_table_init() { Status VSetOperationNode::hash_table_build(RuntimeState* state) { RETURN_IF_ERROR(child(0)->open(state)); SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB( - "Vec Set Operation Node, while constructing the hash table"); + "Vec Set Operation Node, while constructing the hash table"); Block block; MutableBlock mutable_block(child(0)->row_desc().tuple_descriptors()); @@ -248,12 +248,14 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) { _hash_table_mem_tracker->consume(allocated_bytes); _mem_used += allocated_bytes; - if (block.rows() != 0) { mutable_block.merge(block); } + if (block.rows() != 0) { + mutable_block.merge(block); + } // make one block for each 4 gigabytes - constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL; + constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL; if (_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE) { - _build_blocks.emplace_back(mutable_block.to_block()); + _build_blocks.emplace_back(mutable_block.to_block()); // TODO:: Rethink may we should do the proess after we recevie all build blocks ? // which is better. RETURN_IF_ERROR(process_build_block(_build_blocks[index], index)); @@ -282,8 +284,8 @@ Status VSetOperationNode::process_build_block(Block& block, uint8_t offset) { [&](auto&& arg) { using HashTableCtxType = std::decay_t; if constexpr (!std::is_same_v) { - HashTableBuild hash_table_build_process(rows, block, - raw_ptrs, this, offset); + HashTableBuild hash_table_build_process(rows, block, raw_ptrs, + this, offset); hash_table_build_process(arg); } else { LOG(FATAL) << "FATAL: uninited hash table"; @@ -319,7 +321,7 @@ Status VSetOperationNode::extract_build_column(Block& block, ColumnRawPtrs& raw_ RETURN_IF_ERROR(_child_expr_lists[0][i]->execute(&block, &result_col_id)); block.get_by_position(result_col_id).column = - block.get_by_position(result_col_id).column->convert_to_full_column_if_const(); + block.get_by_position(result_col_id).column->convert_to_full_column_if_const(); auto column = block.get_by_position(result_col_id).column.get(); if (auto* nullable = check_and_get_column(*column)) { @@ -349,7 +351,7 @@ Status VSetOperationNode::extract_probe_column(Block& block, ColumnRawPtrs& raw_ RETURN_IF_ERROR(_child_expr_lists[child_id][i]->execute(&block, &result_col_id)); block.get_by_position(result_col_id).column = - block.get_by_position(result_col_id).column->convert_to_full_column_if_const(); + block.get_by_position(result_col_id).column->convert_to_full_column_if_const(); auto column = block.get_by_position(result_col_id).column.get(); if (auto* nullable = check_and_get_column(*column)) { diff --git a/be/src/vec/exec/vsort_exec_exprs.cpp b/be/src/vec/exec/vsort_exec_exprs.cpp index a125424a65..5813e5121f 100644 --- a/be/src/vec/exec/vsort_exec_exprs.cpp +++ b/be/src/vec/exec/vsort_exec_exprs.cpp @@ -30,8 +30,8 @@ Status VSortExecExprs::init(const std::vector& ordering_exprs, RETURN_IF_ERROR(VExpr::create_expr_trees(pool, ordering_exprs, &_lhs_ordering_expr_ctxs)); if (sort_tuple_slot_exprs != NULL) { _materialize_tuple = true; - RETURN_IF_ERROR( - VExpr::create_expr_trees(pool, *sort_tuple_slot_exprs, &_sort_tuple_slot_expr_ctxs)); + RETURN_IF_ERROR(VExpr::create_expr_trees(pool, *sort_tuple_slot_exprs, + &_sort_tuple_slot_expr_ctxs)); } else { _materialize_tuple = false; } @@ -49,8 +49,8 @@ Status VSortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_r const RowDescriptor& output_row_desc, const std::shared_ptr& expr_mem_tracker) { if (_materialize_tuple) { - RETURN_IF_ERROR( - VExpr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc, expr_mem_tracker)); + RETURN_IF_ERROR(VExpr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc, + expr_mem_tracker)); } RETURN_IF_ERROR( VExpr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc, expr_mem_tracker)); @@ -75,5 +75,4 @@ void VSortExecExprs::close(RuntimeState* state) { VExpr::close(_rhs_ordering_expr_ctxs, state); } -} //namespace doris - +} // namespace doris::vectorized diff --git a/be/src/vec/exec/vsort_exec_exprs.h b/be/src/vec/exec/vsort_exec_exprs.h index fd81be2bed..6ed00f7d8e 100644 --- a/be/src/vec/exec/vsort_exec_exprs.h +++ b/be/src/vec/exec/vsort_exec_exprs.h @@ -35,47 +35,45 @@ namespace vectorized { class VSortExecExprs { public: // Initialize the expressions from a TSortInfo using the specified pool. - Status init(const TSortInfo &sort_info, ObjectPool *pool); + Status init(const TSortInfo& sort_info, ObjectPool* pool); // Initialize the ordering and (optionally) materialization expressions from the thrift // TExprs into the specified pool. sort_tuple_slot_exprs is NULL if the tuple is not // materialized. - Status init(const std::vector &ordering_exprs, - const std::vector *sort_tuple_slot_exprs, ObjectPool *pool); + Status init(const std::vector& ordering_exprs, + const std::vector* sort_tuple_slot_exprs, ObjectPool* pool); // prepare all expressions used for sorting and tuple materialization. - Status prepare(RuntimeState *state, const RowDescriptor &child_row_desc, - const RowDescriptor &output_row_desc, - const std::shared_ptr &mem_tracker); + Status prepare(RuntimeState* state, const RowDescriptor& child_row_desc, + const RowDescriptor& output_row_desc, + const std::shared_ptr& mem_tracker); // open all expressions used for sorting and tuple materialization. - Status open(RuntimeState *state); + Status open(RuntimeState* state); // close all expressions used for sorting and tuple materialization. - void close(RuntimeState *state); + void close(RuntimeState* state); - const std::vector &sort_tuple_slot_expr_ctxs() const { + const std::vector& sort_tuple_slot_expr_ctxs() const { return _sort_tuple_slot_expr_ctxs; } // Can only be used after calling prepare() - const std::vector &lhs_ordering_expr_ctxs() const { + const std::vector& lhs_ordering_expr_ctxs() const { return _lhs_ordering_expr_ctxs; } // Can only be used after calling open() - const std::vector &rhs_ordering_expr_ctxs() const { + const std::vector& rhs_ordering_expr_ctxs() const { return _rhs_ordering_expr_ctxs; } - bool need_materialize_tuple() const { - return _materialize_tuple; - } + bool need_materialize_tuple() const { return _materialize_tuple; } private: // Create two VExprContexts for evaluating over the TupleRows. - std::vector _lhs_ordering_expr_ctxs; - std::vector _rhs_ordering_expr_ctxs; + std::vector _lhs_ordering_expr_ctxs; + std::vector _rhs_ordering_expr_ctxs; // If true, the tuples to be sorted are materialized by // _sort_tuple_slot_exprs before the actual sort is performed. @@ -84,15 +82,14 @@ private: // Expressions used to materialize slots in the tuples to be sorted. // One expr per slot in the materialized tuple. Valid only if // _materialize_tuple is true. - std::vector _sort_tuple_slot_expr_ctxs; + std::vector _sort_tuple_slot_expr_ctxs; // Initialize directly from already-created VExprContexts. Callers should manually call // Prepare(), Open(), and Close() on input VExprContexts (instead of calling the // analogous functions in this class). Used for testing. - Status init(const std::vector &lhs_ordering_expr_ctxs, - const std::vector &rhs_ordering_expr_ctxs); + Status init(const std::vector& lhs_ordering_expr_ctxs, + const std::vector& rhs_ordering_expr_ctxs); }; -} // namepace vectorized +} // namespace vectorized } // namespace doris - diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp index 8e963262fe..29e2a06d13 100644 --- a/be/src/vec/exec/vsort_node.cpp +++ b/be/src/vec/exec/vsort_node.cpp @@ -66,7 +66,7 @@ Status VSortNode::open(RuntimeState* state) { // Unless we are inside a subplan expecting to call open()/get_next() on the child // again, the child can be closed at this point. // if (!IsInSubplan()) { -// child(0)->close(state); + // child(0)->close(state); // } return Status::OK(); } @@ -135,7 +135,7 @@ Status VSortNode::sort_input(RuntimeState* state) { size_t mem_usage = block.allocated_bytes(); // dispose TOP-N logic - if (_limit != -1 ) { + if (_limit != -1) { // Here is a little opt to reduce the mem uasge, we build a max heap // to order the block in _block_priority_queue. // if one block totally greater the heap top of _block_priority_queue @@ -144,8 +144,8 @@ Status VSortNode::sort_input(RuntimeState* state) { _total_mem_usage += mem_usage; _sorted_blocks.emplace_back(std::move(block)); _num_rows_in_block += rows; - _block_priority_queue.emplace( - _pool->add(new SortCursorImpl(_sorted_blocks.back(), _sort_description))); + _block_priority_queue.emplace(_pool->add( + new SortCursorImpl(_sorted_blocks.back(), _sort_description))); } else { SortBlockCursor block_cursor( _pool->add(new SortCursorImpl(block, _sort_description))); @@ -204,17 +204,17 @@ Status VSortNode::pretreat_block(doris::vectorized::Block& block) { } void VSortNode::build_merge_tree() { - for (const auto &block : _sorted_blocks) { + for (const auto& block : _sorted_blocks) { _cursors.emplace_back(block, _sort_description); } if (_sorted_blocks.size() > 1) { - for (auto& _cursor : _cursors) - _priority_queue.push(SortCursor(&_cursor)); + for (auto& _cursor : _cursors) _priority_queue.push(SortCursor(&_cursor)); } } -Status VSortNode::merge_sort_read(doris::RuntimeState *state, doris::vectorized::Block *block, bool *eos) { +Status VSortNode::merge_sort_read(doris::RuntimeState* state, doris::vectorized::Block* block, + bool* eos) { size_t num_columns = _sorted_blocks[0].columns(); bool mem_reuse = block->mem_reuse(); @@ -240,8 +240,7 @@ Status VSortNode::merge_sort_read(doris::RuntimeState *state, doris::vectorized: _priority_queue.push(current); } - if (merged_rows == state->batch_size()) - break; + if (merged_rows == state->batch_size()) break; } if (merged_rows == 0) { @@ -257,4 +256,4 @@ Status VSortNode::merge_sort_read(doris::RuntimeState *state, doris::vectorized: return Status::OK(); } -} // end namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp index c05b3ef6a8..d1265726c4 100644 --- a/be/src/vec/exec/vunion_node.cpp +++ b/be/src/vec/exec/vunion_node.cpp @@ -125,8 +125,10 @@ Status VUnionNode::get_next_materialized(RuntimeState* state, Block* block) { DCHECK_LT(_child_idx, _children.size()); bool mem_reuse = block->mem_reuse(); - MutableBlock mblock = mem_reuse ? MutableBlock::build_mutable_block(block) : - MutableBlock(Block(VectorizedUtils::create_columns_with_type_and_name(row_desc()))); + MutableBlock mblock = + mem_reuse ? MutableBlock::build_mutable_block(block) + : MutableBlock(Block( + VectorizedUtils::create_columns_with_type_and_name(row_desc()))); Block child_block; while (has_more_materialized() && mblock.rows() <= state->batch_size()) { @@ -157,9 +159,9 @@ Status VUnionNode::get_next_materialized(RuntimeState* state, Block* block) { // Unless we are inside a subplan expecting to call open()/get_next() on the child // again, the child can be closed at this point. // TODO: Recheck whether is_in_subplan() is right -// if (!is_in_subplan()) { -// child(_child_idx)->close(state); -// } + // if (!is_in_subplan()) { + // child(_child_idx)->close(state); + // } ++_child_idx; } } @@ -177,12 +179,14 @@ Status VUnionNode::get_next_const(RuntimeState* state, Block* block) { DCHECK_LT(_const_expr_list_idx, _const_expr_lists.size()); bool mem_reuse = block->mem_reuse(); - MutableBlock mblock = mem_reuse ? MutableBlock::build_mutable_block(block) : - MutableBlock(Block(VectorizedUtils::create_columns_with_type_and_name(row_desc()))); + MutableBlock mblock = + mem_reuse ? MutableBlock::build_mutable_block(block) + : MutableBlock(Block( + VectorizedUtils::create_columns_with_type_and_name(row_desc()))); for (; _const_expr_list_idx < _const_expr_lists.size(); ++_const_expr_list_idx) { Block tmp_block; tmp_block.insert({vectorized::ColumnUInt8::create(1), - std::make_shared(), ""}); + std::make_shared(), ""}); int const_expr_lists_size = _const_expr_lists[_const_expr_list_idx].size(); std::vector result_list(const_expr_lists_size); for (size_t i = 0; i < const_expr_lists_size; ++i) { @@ -201,7 +205,7 @@ Status VUnionNode::get_next_const(RuntimeState* state, Block* block) { // need add one row to make sure the union node exec const expr return at least one row if (block->rows() == 0) { block->insert({vectorized::ColumnUInt8::create(1), - std::make_shared(), ""}); + std::make_shared(), ""}); } return Status::OK(); diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index a1a441b7b8..6c63fcc760 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -45,7 +45,8 @@ doris::Status VCastExpr::prepare(doris::RuntimeState* state, const doris::RowDes argument_template.emplace_back(std::move(child_column), child->data_type(), child_name); argument_template.emplace_back(_cast_param, _cast_param_data_type, _target_data_type_name); - _function = SimpleFunctionFactory::instance().get_function(function_name, argument_template, _data_type); + _function = SimpleFunctionFactory::instance().get_function(function_name, argument_template, + _data_type); if (_function == nullptr) { return Status::NotSupported( diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp index c7e6731897..0ee0d7cf16 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.cpp +++ b/be/src/vec/exprs/vectorized_agg_fn.cpp @@ -39,12 +39,12 @@ AggFnEvaluator::AggFnEvaluator(const TExprNode& desc) _exec_timer(nullptr), _merge_timer(nullptr), _expr_timer(nullptr) { - bool nullable = true; - if (desc.__isset.is_nullable) { - nullable = desc.is_nullable; - } - _data_type = DataTypeFactory::instance().create_data_type(_return_type, nullable); + bool nullable = true; + if (desc.__isset.is_nullable) { + nullable = desc.is_nullable; } + _data_type = DataTypeFactory::instance().create_data_type(_return_type, nullable); +} Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, AggFnEvaluator** result) { *result = pool->add(new AggFnEvaluator(desc.nodes[0])); @@ -87,8 +87,8 @@ Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, M child_expr_name.emplace_back(_input_exprs_ctxs[i]->root()->expr_name()); } - _function = AggregateFunctionSimpleFactory::instance().get(_fn.name.function_name, argument_types, - params, _data_type->is_nullable()); + _function = AggregateFunctionSimpleFactory::instance().get( + _fn.name.function_name, argument_types, params, _data_type->is_nullable()); if (_function == nullptr) { return Status::InternalError( fmt::format("Agg Function {} is not implemented", _fn.name.function_name)); diff --git a/be/src/vec/exprs/vin_predicate.cpp b/be/src/vec/exprs/vin_predicate.cpp index f8c096d8ee..24e7bd87f1 100644 --- a/be/src/vec/exprs/vin_predicate.cpp +++ b/be/src/vec/exprs/vin_predicate.cpp @@ -30,9 +30,7 @@ namespace doris::vectorized { VInPredicate::VInPredicate(const TExprNode& node) - : VExpr(node), - _is_not_in(node.in_predicate.is_not_in), - _is_prepare(false) {} + : VExpr(node), _is_not_in(node.in_predicate.is_not_in), _is_prepare(false) {} Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) { diff --git a/be/src/vec/exprs/vinfo_func.cpp b/be/src/vec/exprs/vinfo_func.cpp index d703c3790f..7e86dcefa8 100644 --- a/be/src/vec/exprs/vinfo_func.cpp +++ b/be/src/vec/exprs/vinfo_func.cpp @@ -28,20 +28,20 @@ namespace doris::vectorized { VInfoFunc::VInfoFunc(const TExprNode& node) : VExpr(node) { Field field; switch (_type.type) { - case TYPE_BIGINT: { - field = Int64(node.info_func.int_value); - break; - } - case TYPE_STRING: - case TYPE_CHAR: - case TYPE_VARCHAR: { - field = node.info_func.str_value; - break; - } - default: { - DCHECK(false) << "Invalid type: " << _type.type; - break; - } + case TYPE_BIGINT: { + field = Int64(node.info_func.int_value); + break; + } + case TYPE_STRING: + case TYPE_CHAR: + case TYPE_VARCHAR: { + field = node.info_func.str_value; + break; + } + default: { + DCHECK(false) << "Invalid type: " << _type.type; + break; + } } this->_column_ptr = _data_type->create_column_const(1, field); } @@ -56,4 +56,4 @@ Status VInfoFunc::execute(VExprContext* context, vectorized::Block* block, int* return Status::OK(); } -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/vinfo_func.h b/be/src/vec/exprs/vinfo_func.h index 89ed0f5cf2..ca4dd21b8a 100644 --- a/be/src/vec/exprs/vinfo_func.h +++ b/be/src/vec/exprs/vinfo_func.h @@ -33,9 +33,12 @@ public: VInfoFunc(const TExprNode& node); virtual ~VInfoFunc() {} - virtual VExpr* clone(doris::ObjectPool* pool) const override { return pool->add(new VInfoFunc(*this)); } + virtual VExpr* clone(doris::ObjectPool* pool) const override { + return pool->add(new VInfoFunc(*this)); + } virtual const std::string& expr_name() const override { return _expr_name; } - virtual Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id) override; + virtual Status execute(VExprContext* context, vectorized::Block* block, + int* result_column_id) override; private: const std::string _expr_name = "vinfofunc expr"; @@ -44,4 +47,3 @@ private: } // namespace vectorized } // namespace doris - diff --git a/be/src/vec/exprs/vslot_ref.cpp b/be/src/vec/exprs/vslot_ref.cpp index 57038eb63f..aa728b9c39 100644 --- a/be/src/vec/exprs/vslot_ref.cpp +++ b/be/src/vec/exprs/vslot_ref.cpp @@ -25,16 +25,13 @@ namespace doris::vectorized { using doris::Status; using doris::SlotDescriptor; VSlotRef::VSlotRef(const doris::TExprNode& node) - : VExpr(node), - _slot_id(node.slot_ref.slot_id), - _column_id(-1), - _column_name(nullptr) { - if (node.__isset.is_nullable) { - _is_nullable = node.is_nullable; - } else { - _is_nullable = true; - } - } + : VExpr(node), _slot_id(node.slot_ref.slot_id), _column_id(-1), _column_name(nullptr) { + if (node.__isset.is_nullable) { + _is_nullable = node.is_nullable; + } else { + _is_nullable = true; + } +} VSlotRef::VSlotRef(const SlotDescriptor* desc) : VExpr(desc->type(), true, desc->is_nullable()), diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.cpp b/be/src/vec/exprs/vtuple_is_null_predicate.cpp index 6a5c3eb017..f7330e991f 100644 --- a/be/src/vec/exprs/vtuple_is_null_predicate.cpp +++ b/be/src/vec/exprs/vtuple_is_null_predicate.cpp @@ -35,7 +35,7 @@ VTupleIsNullPredicate::VTupleIsNullPredicate(const TExprNode& node) node.tuple_is_null_pred.tuple_ids.end()) {} Status VTupleIsNullPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, - VExprContext* context) { + VExprContext* context) { RETURN_IF_ERROR(VExpr::prepare(state, desc, context)); DCHECK_EQ(0, _children.size()); DCHECK_GT(_tuple_ids.size(), 0); @@ -63,8 +63,11 @@ Status VTupleIsNullPredicate::execute(VExprContext* context, Block* block, int* auto* __restrict ans_map = ans->get_data().data(); for (auto col_id : _column_to_check) { - auto* __restrict null_map = reinterpret_cast( - *block->get_by_position(col_id).column).get_null_map_column().get_data().data(); + auto* __restrict null_map = + reinterpret_cast(*block->get_by_position(col_id).column) + .get_null_map_column() + .get_data() + .data(); for (int i = 0; i < target_rows; ++i) { ans_map[i] &= null_map[i] == JOIN_NULL_HINT; diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.h b/be/src/vec/exprs/vtuple_is_null_predicate.h index 7854253ff6..c05693e393 100644 --- a/be/src/vec/exprs/vtuple_is_null_predicate.h +++ b/be/src/vec/exprs/vtuple_is_null_predicate.h @@ -25,9 +25,9 @@ public: explicit VTupleIsNullPredicate(const TExprNode& node); ~VTupleIsNullPredicate() override = default; doris::Status execute(VExprContext* context, doris::vectorized::Block* block, - int* result_column_id) override; + int* result_column_id) override; doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, - VExprContext* context) override; + VExprContext* context) override; VExpr* clone(doris::ObjectPool* pool) const override { return pool->add(new VTupleIsNullPredicate(*this)); diff --git a/be/src/vec/functions/comparison_equal_for_null.cpp b/be/src/vec/functions/comparison_equal_for_null.cpp index 0e3b3315f8..c7aa595925 100644 --- a/be/src/vec/functions/comparison_equal_for_null.cpp +++ b/be/src/vec/functions/comparison_equal_for_null.cpp @@ -55,20 +55,27 @@ public: if (left_nullable == right_nullable) { auto return_type = std::make_shared(); - ColumnsWithTypeAndName eq_columns - { - ColumnWithTypeAndName{left_nullable ? left_column->get_nested_column_ptr() : col_left.column, - left_nullable ? assert_cast - (col_left.type.get())->get_nested_type() : col_left.type, ""}, - ColumnWithTypeAndName{left_nullable ? right_column->get_nested_column_ptr() : col_right.column, - left_nullable ? assert_cast - (col_right.type.get())->get_nested_type() : col_right.type, ""} - }; + ColumnsWithTypeAndName eq_columns { + ColumnWithTypeAndName { + left_nullable ? left_column->get_nested_column_ptr() : col_left.column, + left_nullable + ? assert_cast(col_left.type.get()) + ->get_nested_type() + : col_left.type, + ""}, + ColumnWithTypeAndName {left_nullable ? right_column->get_nested_column_ptr() + : col_right.column, + left_nullable ? assert_cast( + col_right.type.get()) + ->get_nested_type() + : col_right.type, + ""}}; Block temporary_block(eq_columns); - auto func_eq = SimpleFunctionFactory::instance().get_function("eq", eq_columns, return_type); + auto func_eq = + SimpleFunctionFactory::instance().get_function("eq", eq_columns, return_type); DCHECK(func_eq); - temporary_block.insert(ColumnWithTypeAndName{nullptr, return_type, ""}); + temporary_block.insert(ColumnWithTypeAndName {nullptr, return_type, ""}); func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count); if (left_nullable) { @@ -90,22 +97,23 @@ public: } else { auto return_type = make_nullable(std::make_shared()); - const ColumnsWithTypeAndName eq_columns - { - ColumnWithTypeAndName{col_left.column, col_left.type, ""}, - ColumnWithTypeAndName{col_right.column, col_right.type, ""} - }; - auto func_eq = SimpleFunctionFactory::instance().get_function("eq", eq_columns, return_type); + const ColumnsWithTypeAndName eq_columns { + ColumnWithTypeAndName {col_left.column, col_left.type, ""}, + ColumnWithTypeAndName {col_right.column, col_right.type, ""}}; + auto func_eq = + SimpleFunctionFactory::instance().get_function("eq", eq_columns, return_type); DCHECK(func_eq); Block temporary_block(eq_columns); - temporary_block.insert(ColumnWithTypeAndName{nullptr, return_type, ""}); + temporary_block.insert(ColumnWithTypeAndName {nullptr, return_type, ""}); func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count); auto res_nullable_column = assert_cast( std::move(*temporary_block.get_by_position(2).column).mutate().get()); auto& null_map = res_nullable_column->get_null_map_data(); - auto& res_map = assert_cast&>(res_nullable_column->get_nested_column()).get_data(); + auto& res_map = + assert_cast&>(res_nullable_column->get_nested_column()) + .get_data(); auto* __restrict res = res_map.data(); auto* __restrict l = null_map.data(); @@ -122,4 +130,4 @@ public: void register_function_comparison_eq_for_null(SimpleFunctionFactory& factory) { factory.register_function(); } -} \ No newline at end of file +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 7640a2707d..b148746490 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -33,14 +33,14 @@ namespace doris::vectorized { -#define TIME_FUNCTION_IMPL(CLASS, UNIT, FUNCTION) \ - struct CLASS { \ - static constexpr auto name = #UNIT; \ - static inline auto execute(const Int64& t, bool& is_null) { \ +#define TIME_FUNCTION_IMPL(CLASS, UNIT, FUNCTION) \ + struct CLASS { \ + static constexpr auto name = #UNIT; \ + static inline auto execute(const Int64& t, bool& is_null) { \ const auto& date_time_value = (doris::vectorized::VecDateTimeValue&)(t); \ - is_null = !date_time_value.is_valid_date(); \ - return date_time_value.FUNCTION; \ - } \ + is_null = !date_time_value.is_valid_date(); \ + return date_time_value.FUNCTION; \ + } \ } #define TO_TIME_FUNCTION(CLASS, UNIT) TIME_FUNCTION_IMPL(CLASS, UNIT, UNIT()) @@ -60,18 +60,18 @@ TIME_FUNCTION_IMPL(DayOfWeekImpl, dayofweek, day_of_week()); // TODO: the method should be always not nullable TIME_FUNCTION_IMPL(ToDaysImpl, to_days, daynr()); -#define TIME_FUNCTION_ONE_ARG_IMPL(CLASS, UNIT, FUNCTION) \ - struct CLASS { \ - static constexpr auto name = #UNIT; \ - static inline auto execute(const Int64& t, bool& is_null) { \ +#define TIME_FUNCTION_ONE_ARG_IMPL(CLASS, UNIT, FUNCTION) \ + struct CLASS { \ + static constexpr auto name = #UNIT; \ + static inline auto execute(const Int64& t, bool& is_null) { \ const auto& date_time_value = (doris::vectorized::VecDateTimeValue&)(t); \ - is_null = !date_time_value.is_valid_date(); \ - return date_time_value.FUNCTION; \ - } \ - \ - static DataTypes get_variadic_argument_types() { \ - return {std::make_shared()}; \ - } \ + is_null = !date_time_value.is_valid_date(); \ + return date_time_value.FUNCTION; \ + } \ + \ + static DataTypes get_variadic_argument_types() { \ + return {std::make_shared()}; \ + } \ } TIME_FUNCTION_ONE_ARG_IMPL(ToWeekOneArgImpl, week, week(mysql_week_mode(0))); @@ -151,19 +151,19 @@ struct DateFormatImpl { if (format.size > 128) { offset += 1; res_data.emplace_back(0); - return std::pair{offset, true}; + return std::pair {offset, true}; } char buf[128]; if (!dt.to_format_string(format.data, format.size, buf)) { offset += 1; res_data.emplace_back(0); - return std::pair{offset, true}; + return std::pair {offset, true}; } auto len = strlen(buf) + 1; res_data.insert(buf, buf + len); offset += len; - return std::pair{offset, false}; + return std::pair {offset, false}; } }; @@ -182,20 +182,20 @@ struct FromUnixTimeImpl { if (format.size > 128 || val < 0 || val > INT_MAX || !dt.from_unixtime(val, time_zone)) { offset += 1; res_data.emplace_back(0); - return std::pair{offset, true}; + return std::pair {offset, true}; } char buf[128]; if (!dt.to_format_string(format.data, format.size, buf)) { offset += 1; res_data.emplace_back(0); - return std::pair{offset, true}; + return std::pair {offset, true}; } auto len = strlen(buf) + 1; res_data.insert(buf, buf + len); offset += len; - return std::pair{offset, false}; + return std::pair {offset, false}; } }; @@ -213,7 +213,7 @@ struct TransformerToStringOneArgument { const auto& t = ts[i]; const auto& date_time_value = reinterpret_cast(t); res_offsets[i] = Transform::execute(date_time_value, res_data, offset, - reinterpret_cast(null_map[i])); + reinterpret_cast(null_map[i])); } } }; @@ -243,7 +243,7 @@ struct TransformerToStringTwoArgument { template struct Transformer { static void vector(const PaddedPODArray& vec_from, PaddedPODArray& vec_to, - NullMap& null_map) { + NullMap& null_map) { size_t size = vec_from.size(); vec_to.resize(size); null_map.resize_fill(size, false); @@ -265,8 +265,8 @@ struct DateTimeTransformImpl { auto col_to = ColumnVector::create(); auto null_map = ColumnVector::create(); Op::vector(sources->get_data(), col_to->get_data(), null_map->get_data()); - block.replace_by_position(result, - ColumnNullable::create(std::move(col_to), std::move(null_map))); + block.replace_by_position( + result, ColumnNullable::create(std::move(col_to), std::move(null_map))); } else { return Status::RuntimeError(fmt::format( "Illegal column {} of first argument of function {}", diff --git a/be/src/vec/functions/divide.cpp b/be/src/vec/functions/divide.cpp index 71de120c98..b6d3a2b35e 100644 --- a/be/src/vec/functions/divide.cpp +++ b/be/src/vec/functions/divide.cpp @@ -18,7 +18,6 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/divide.cpp // and modified by Doris - #include "vec/functions/function_binary_arithmetic_to_null_type.h" #include "vec/functions/simple_function_factory.h" @@ -32,7 +31,8 @@ struct DivideFloatingImpl { static const constexpr bool allow_decimal = true; template - static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, NullMap& null_map, size_t index) { + static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, NullMap& null_map, + size_t index) { null_map[index] = b.is_zero(); return a / (b.is_zero() ? one : b); } diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 8ec15c1b86..bda271be0d 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -269,7 +269,8 @@ Status PreparedFunctionImpl::execute(FunctionContext* context, Block& block, // res.column = block_without_low_cardinality.safe_get_by_position(result).column; // } // } else - return execute_without_low_cardinality_columns(context, block, args, result, input_rows_count, dry_run); + return execute_without_low_cardinality_columns(context, block, args, result, input_rows_count, + dry_run); } void FunctionBuilderImpl::check_number_of_arguments(size_t number_of_arguments) const { diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h index dd36158646..744b55a09f 100644 --- a/be/src/vec/functions/function_binary_arithmetic.h +++ b/be/src/vec/functions/function_binary_arithmetic.h @@ -456,7 +456,7 @@ template