From 569d0bb3af4cba51ff0e3b736ae9f360beaf90da Mon Sep 17 00:00:00 2001 From: LingBin Date: Tue, 26 Nov 2019 08:22:14 -0600 Subject: [PATCH] Replace all remaining boost::split() with strings::split() (#2302) --- be/src/exec/csv_scan_node.cpp | 6 --- be/src/runtime/routine_load/data_consumer.cpp | 21 +++++---- be/src/runtime/small_file_mgr.cpp | 16 +++---- be/src/runtime/user_function_cache.cpp | 14 +++--- be/src/service/backend_options.cpp | 6 +-- be/src/util/cidr.cpp | 5 +- be/src/util/disk_info.cpp | 5 +- be/src/util/file_utils.cpp | 46 ++++++++----------- be/src/util/mem_info.cpp | 6 +-- be/src/util/string_util.cpp | 9 ++-- 10 files changed, 59 insertions(+), 75 deletions(-) diff --git a/be/src/exec/csv_scan_node.cpp b/be/src/exec/csv_scan_node.cpp index fbcd3ae50f..4eda57a15f 100644 --- a/be/src/exec/csv_scan_node.cpp +++ b/be/src/exec/csv_scan_node.cpp @@ -20,10 +20,6 @@ #include #include -#include -#include -#include - #include #include "exec/text_converter.hpp" @@ -61,7 +57,6 @@ public: } char const* c_str() const { - return _begin; } char const* begin() const { @@ -559,7 +554,6 @@ bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) std::vector fields; { SCOPED_TIMER(_split_line_timer); - // boost::split(fields, line, boost::is_any_of(_column_separator)); split_line(line, _column_separator[0], fields); } diff --git a/be/src/runtime/routine_load/data_consumer.cpp b/be/src/runtime/routine_load/data_consumer.cpp index f0a6b96e76..ca2675aa78 100644 --- a/be/src/runtime/routine_load/data_consumer.cpp +++ b/be/src/runtime/routine_load/data_consumer.cpp @@ -22,6 +22,8 @@ #include #include +#include "gutil/strings/split.h" + #include "common/status.h" #include "service/backend_options.h" #include "runtime/small_file_mgr.h" @@ -34,14 +36,14 @@ namespace doris { // init kafka consumer will only set common configs such as // brokers, groupid Status KafkaDataConsumer::init(StreamLoadContext* ctx) { - std::unique_lock l(_lock); + std::unique_lock l(_lock); if (_init) { // this consumer has already been initialized. return Status::OK(); } RdKafka::Conf *conf = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL); - + // conf has to be deleted finally auto conf_deleter = [conf] () { delete conf; }; DeferOp delete_conf(std::bind(conf_deleter)); @@ -84,8 +86,8 @@ Status KafkaDataConsumer::init(StreamLoadContext* ctx) { for (auto& item : ctx->kafka_info->properties) { if (boost::algorithm::starts_with(item.second, "FILE:")) { // file property should has format: FILE:file_id:md5 - std::vector parts; - boost::split(parts, item.second, boost::is_any_of(":")); + std::vector parts = strings::Split( + item.second, ":", strings::SkipWhitespace()); if (parts.size() != 3) { return Status::InternalError("PAUSE: Invalid file property of kafka: " + item.second); } @@ -94,7 +96,8 @@ Status KafkaDataConsumer::init(StreamLoadContext* ctx) { Status st = ctx->exec_env()->small_file_mgr()->get_file(file_id, parts[2], &file_path); if (!st.ok()) { std::stringstream ss; - ss << "PAUSE: failed to get file for config: " << item.first << ", error: " << st.get_error_msg(); + ss << "PAUSE: failed to get file for config: " << item.first + << ", error: " << st.get_error_msg(); return Status::InternalError(ss.str()); } RETURN_IF_ERROR(set_conf(item.first, file_path)); @@ -112,7 +115,7 @@ Status KafkaDataConsumer::init(StreamLoadContext* ctx) { } // create consumer - _k_consumer = RdKafka::KafkaConsumer::create(conf, errstr); + _k_consumer = RdKafka::KafkaConsumer::create(conf, errstr); if (!_k_consumer) { LOG(WARNING) << "PAUSE: failed to create kafka consumer: " << errstr; return Status::InternalError("PAUSE: failed to create kafka consumer: " + errstr); @@ -263,7 +266,7 @@ Status KafkaDataConsumer::get_partition_meta(std::vector* partition_ids if ((*it)->topic() != _topic) { continue; } - + if ((*it)->err() != RdKafka::ERR_NO_ERROR) { std::stringstream ss; ss << "error: " << err2str((*it)->err()); @@ -284,7 +287,7 @@ Status KafkaDataConsumer::get_partition_meta(std::vector* partition_ids return Status::InternalError("no partition in this topic"); } - return Status::OK(); + return Status::OK(); } Status KafkaDataConsumer::cancel(StreamLoadContext* ctx) { @@ -309,7 +312,7 @@ Status KafkaDataConsumer::commit(std::vector& offset) if (err != RdKafka::ERR_NO_ERROR) { std::stringstream ss; ss << "failed to commit kafka offset : " << RdKafka::err2str(err); - return Status::InternalError(ss.str()); + return Status::InternalError(ss.str()); } return Status::OK(); } diff --git a/be/src/runtime/small_file_mgr.cpp b/be/src/runtime/small_file_mgr.cpp index 7b6c80fbcb..7954b04e0d 100644 --- a/be/src/runtime/small_file_mgr.cpp +++ b/be/src/runtime/small_file_mgr.cpp @@ -21,10 +21,11 @@ #include #include -#include // boost::split #include // boost::algorithm::starts_with #include // boost::is_any_of +#include "gutil/strings/split.h" + #include "common/status.h" #include "env/env.h" #include "gen_cpp/HeartbeatService.h" @@ -74,14 +75,13 @@ Status SmallFileMgr::_load_single_file( const std::string& file_name) { // file name format should be like: // file_id.md5 - std::vector parts; - boost::split(parts, file_name, boost::is_any_of(".")); + std::vector parts = strings::Split(file_name, "."); if (parts.size() != 2) { return Status::InternalError("Not a valid file name: " + file_name); } int64_t file_id = std::stol(parts[0]); std::string md5 = parts[1]; - + if (_file_cache.find(file_id) != _file_cache.end()) { return Status::InternalError("File with same id is already been loaded: " + file_id); } @@ -95,7 +95,7 @@ Status SmallFileMgr::_load_single_file( CacheEntry entry; entry.path = path + "/" + file_name; entry.md5 = file_md5; - + _file_cache.emplace(file_id, entry); return Status::OK(); } @@ -106,7 +106,7 @@ Status SmallFileMgr::get_file( std::string* file_path) { std::unique_lock l(_lock); - // find in cache + // find in cache auto it = _file_cache.find(file_id); if (it != _file_cache.end()) { // find the cached file, check it @@ -130,7 +130,7 @@ Status SmallFileMgr::get_file( // file not found in cache. download it from FE RETURN_IF_ERROR(_download_file(file_id, md5, file_path)); - return Status::OK(); + return Status::OK(); } Status SmallFileMgr::_check_file(const CacheEntry& entry, const std::string& md5) { @@ -226,7 +226,7 @@ Status SmallFileMgr::_download_file( entry.md5 = md5; _file_cache.emplace(file_id, entry); - *file_path = real_file_path; + *file_path = real_file_path; LOG(INFO) << "finished to download file: " << file_path; return Status::OK(); diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp index 9e15435319..531998045a 100644 --- a/be/src/runtime/user_function_cache.cpp +++ b/be/src/runtime/user_function_cache.cpp @@ -20,10 +20,11 @@ #include #include -#include // boost::split #include // boost::algorithm::ends_with #include // boost::is_any_of +#include "gutil/strings/split.h" + #include "env/env.h" #include "http/http_client.h" #include "util/dynamic_util.h" @@ -35,7 +36,7 @@ namespace doris { static const int kLibShardNum = 128; -// function cache entry, store information for +// function cache entry, store information for struct UserFunctionCacheEntry { UserFunctionCacheEntry(int64_t fid_, const std::string& checksum_, const std::string& lib_file_) @@ -118,7 +119,7 @@ Status UserFunctionCache::init(const std::string& lib_dir) { _lib_dir = lib_dir; // 1. dynamic open current process RETURN_IF_ERROR(dynamic_open(nullptr, &_current_process_handle)); - // 2. load all cached + // 2. load all cached RETURN_IF_ERROR(_load_cached_lib()); return Status::OK(); } @@ -128,8 +129,7 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std return Status::InternalError("unknown library file format"); } - std::vector split_parts; - boost::split(split_parts, file, boost::is_any_of(".")); + std::vector split_parts = strings::Split(file, "."); if (split_parts.size() != 3) { return Status::InternalError("user function's name should be function_id.checksum.so"); } @@ -283,7 +283,7 @@ void UserFunctionCache::_destroy_cache_entry(UserFunctionCacheEntry* entry) { entry->unref(); } entry->should_delete_library.store(true); - // now we need to drop + // now we need to drop if (entry->unref()) { delete entry; } @@ -353,7 +353,7 @@ Status UserFunctionCache::_download_lib( << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, 64); return Status::InternalError("fail to rename file"); } - + // check download entry->is_downloaded = true; return Status::OK(); diff --git a/be/src/service/backend_options.cpp b/be/src/service/backend_options.cpp index 2ac054c0ee..10c7867496 100644 --- a/be/src/service/backend_options.cpp +++ b/be/src/service/backend_options.cpp @@ -19,7 +19,7 @@ #include -#include +#include "gutil/strings/split.h" #include "common/logging.h" #include "common/status.h" @@ -87,8 +87,8 @@ bool BackendOptions::analyze_priority_cidrs() { } LOG(INFO) << "priority cidrs in conf: " << config::priority_networks; - std::vector cidr_strs; - boost::split(cidr_strs, config::priority_networks, boost::is_any_of(PRIORITY_CIDR_SEPARATOR)); + std::vector cidr_strs = strings::Split( + config::priority_networks, PRIORITY_CIDR_SEPARATOR); for (auto& cidr_str : cidr_strs) { CIDR cidr; diff --git a/be/src/util/cidr.cpp b/be/src/util/cidr.cpp index cac754ec55..8a70cf6ca0 100644 --- a/be/src/util/cidr.cpp +++ b/be/src/util/cidr.cpp @@ -19,7 +19,7 @@ #include -#include +#include "gutil/strings/split.h" #include "common/logging.h" @@ -44,8 +44,7 @@ bool CIDR::reset(const std::string& cidr_str) { } VLOG(2) << "cidr format str: " << cidr_format_str; - std::vector cidr_items; - boost::split(cidr_items, cidr_format_str, boost::is_any_of("/")); + std::vector cidr_items = strings::Split(cidr_format_str, "/"); if (cidr_items.size() != 2) { LOG(WARNING) << "wrong CIDR format. network=" << cidr_str; return false; diff --git a/be/src/util/disk_info.cpp b/be/src/util/disk_info.cpp index 56287406e5..be3e2f7af2 100644 --- a/be/src/util/disk_info.cpp +++ b/be/src/util/disk_info.cpp @@ -29,6 +29,8 @@ #include #include +#include "gutil/strings/split.h" + namespace doris { bool DiskInfo::_s_initialized; @@ -52,8 +54,7 @@ void DiskInfo::get_device_names() { getline(partitions, line); boost::trim(line); - std::vector fields; - boost::split(fields, line, boost::is_any_of(" "), boost::token_compress_on); + std::vector fields = strings::Split(line, " ", strings::SkipWhitespace()); if (fields.size() != 4) { continue; diff --git a/be/src/util/file_utils.cpp b/be/src/util/file_utils.cpp index 7f5723c83e..e54df9c6c4 100644 --- a/be/src/util/file_utils.cpp +++ b/be/src/util/file_utils.cpp @@ -28,11 +28,11 @@ #include #include -#include -#include #include +#include "gutil/strings/split.h" +#include "gutil/strings/strip.h" #include "gutil/strings/substitute.h" #include "env/env.h" @@ -56,7 +56,7 @@ Status FileUtils::create_dir(const std::string& path, Env* env) { bool is_dir = false; Status s = env->is_directory(partial_path, &is_dir); - + if (s.ok()) { if (is_dir) { // It's a normal directory. @@ -66,7 +66,7 @@ Status FileUtils::create_dir(const std::string& path, Env* env) { // Maybe a file or a symlink. Let's try to follow the symlink. string real_partial_path; RETURN_IF_ERROR(env->canonicalize(partial_path, &real_partial_path)); - + RETURN_IF_ERROR(env->is_directory(real_partial_path, &is_dir)); if (is_dir) { // It's a symlink to a directory. @@ -75,7 +75,7 @@ Status FileUtils::create_dir(const std::string& path, Env* env) { return Status::IOError(partial_path + " exists but is not a directory"); } } - + RETURN_IF_ERROR(env->create_dir_if_missing(partial_path)); } @@ -108,7 +108,7 @@ Status FileUtils::remove_all(const std::string& file_path) { Status FileUtils::remove(const std::string& path, doris::Env* env) { bool is_dir; RETURN_IF_ERROR(env->is_directory(path, &is_dir)); - + if (is_dir) { return env->delete_dir(path); } else { @@ -144,10 +144,10 @@ Status FileUtils::list_dirs_files(const std::string& path, std::set if (is_dot_or_dotdot(name)) { return true; } - + string temp_path = path + "/" + name; bool is_dir; - + auto st = env->is_directory(temp_path, &is_dir); if (st.ok()) { if (is_dir) { @@ -160,10 +160,10 @@ Status FileUtils::list_dirs_files(const std::string& path, std::set } else { LOG(WARNING) << "check path " << path << "is directory error: " << st.to_string(); } - + return true; }; - + return env->iterate_dir(path, cb); } @@ -180,9 +180,9 @@ Status FileUtils::get_children_count(Env* env, const std::string& dir, int64_t* bool FileUtils::is_dir(const std::string& file_path, Env* env) { bool ret; if (env->is_directory(file_path, &ret).ok()) { - return ret; + return ret; } - + return false; } @@ -204,18 +204,10 @@ std::string FileUtils::path_of_fd(int fd) { Status FileUtils::split_pathes(const char* path, std::vector* path_vec) { path_vec->clear(); - try { - boost::split(*path_vec, path, - boost::is_any_of(";"), - boost::token_compress_on); - } catch (...) { - std::stringstream ss; - ss << "Boost split path failed.[path=" << path << "]"; - return Status::InternalError(ss.str()); - } + *path_vec = strings::Split(path, ";", strings::SkipWhitespace()); for (std::vector::iterator it = path_vec->begin(); it != path_vec->end();) { - boost::trim(*it); + StripWhiteSpace(&(*it)); it->erase(it->find_last_not_of("/") + 1); if (it->size() == 0) { @@ -243,7 +235,7 @@ Status FileUtils::split_pathes(const char* path, std::vector* path_ } Status FileUtils::copy_file(const std::string& src_path, const std::string& dest_path) { - // open src file + // open src file FileHandler src_file; if (src_file.open(src_path.c_str(), O_RDONLY) != OLAP_SUCCESS) { char errmsg[64]; @@ -258,7 +250,7 @@ Status FileUtils::copy_file(const std::string& src_path, const std::string& dest LOG(ERROR) << "open file failed: " << dest_path << strerror_r(errno, errmsg, 64); return Status::InternalError("Internal Error"); } - + const int64_t BUF_SIZE = 8192; char *buf = new char[BUF_SIZE]; DeferOp free_buf(std::bind(std::default_delete(), buf)); @@ -284,7 +276,7 @@ Status FileUtils::md5sum(const std::string& file, std::string* md5sum) { if (fd < 0) { return Status::InternalError("failed to open file"); } - + struct stat statbuf; if (fstat(fd, &statbuf) < 0) { close(fd); @@ -295,14 +287,14 @@ Status FileUtils::md5sum(const std::string& file, std::string* md5sum) { unsigned char result[MD5_DIGEST_LENGTH]; MD5((unsigned char*) buf, file_len, result); - munmap(buf, file_len); + munmap(buf, file_len); std::stringstream ss; for (int32_t i = 0; i < MD5_DIGEST_LENGTH; i++) { ss << std::setfill('0') << std::setw(2) << std::hex << (int) result[i]; } ss >> *md5sum; - + close(fd); return Status::OK(); } diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp index 2950e3c4f0..68e65742fa 100644 --- a/be/src/util/mem_info.cpp +++ b/be/src/util/mem_info.cpp @@ -24,8 +24,7 @@ #include #include -#include -#include +#include "gutil/strings/split.h" #include "util/pretty_printer.h" #include "util/string_parser.hpp" @@ -42,8 +41,7 @@ void MemInfo::init() { while (meminfo.good() && !meminfo.eof()) { getline(meminfo, line); - std::vector fields; - boost::split(fields, line, boost::is_any_of(" "), boost::token_compress_on); + std::vector fields = strings::Split(line, " ", strings::SkipWhitespace()); // We expect lines such as, e.g., 'MemTotal: 16129508 kB' if (fields.size() < 3) { diff --git a/be/src/util/string_util.cpp b/be/src/util/string_util.cpp index 738b03da8d..b61dbcea7a 100644 --- a/be/src/util/string_util.cpp +++ b/be/src/util/string_util.cpp @@ -17,17 +17,14 @@ #include "util/string_util.h" +#include "gutil/strings/split.h" + namespace doris { std::size_t hash_of_path(const std::string& identifier, const std::string& path) { std::size_t hash = std::hash()(identifier); - std::vector path_parts; - boost::split(path_parts, path, boost::is_any_of("/")); + std::vector path_parts = strings::Split(path, "/", strings::SkipWhitespace()); for (auto& part : path_parts) { - if (part.empty()) { - continue; - } - boost::hash_combine(hash, part); } return hash;