From adb321331469f34b8434131823e097bf985ee09b Mon Sep 17 00:00:00 2001 From: LingBin Date: Thu, 21 Sep 2017 20:27:49 +0800 Subject: [PATCH] Do not show token and file path in load error url (#114) --- be/src/http/download_action.cpp | 92 +++++++++++++++++++++++++------- be/src/http/download_action.h | 21 +++++++- be/src/olap/olap_define.h | 1 + be/src/runtime/etl_job_mgr.cpp | 10 +++- be/src/runtime/etl_job_mgr.h | 1 + be/src/runtime/exec_env.cpp | 7 +++ be/src/runtime/load_path_mgr.cpp | 67 +++++++++++++++++++---- be/src/runtime/load_path_mgr.h | 16 +++++- be/src/runtime/runtime_state.cpp | 18 ++++--- 9 files changed, 194 insertions(+), 39 deletions(-) diff --git a/be/src/http/download_action.cpp b/be/src/http/download_action.cpp index fe0b5822c4..0edbdb3bcd 100644 --- a/be/src/http/download_action.cpp +++ b/be/src/http/download_action.cpp @@ -40,15 +40,22 @@ const std::string TOKEN_PARAMETER = "token"; DownloadAction::DownloadAction(ExecEnv* exec_env, const std::vector& allow_dirs) : _exec_env(exec_env), + _download_type(NORMAL), _allow_paths(allow_dirs) { + } -void DownloadAction::handle(HttpRequest *req, HttpChannel *channel) { - LOG(INFO) << "accept one download request " << req->debug_string(); +DownloadAction::DownloadAction(ExecEnv* exec_env, const std::string& error_log_root_dir) : + _exec_env(exec_env), + _download_type(ERROR_LOG), + _error_log_root_dir(error_log_root_dir) { - // add tid to cgroup in order to limit read bandwidth - CgroupsMgr::apply_system_cgroup(); +} +void DownloadAction::handle_normal( + HttpRequest *req, + HttpChannel *channel, + const std::string& file_param) { // check token Status status; if (config::enable_token_check) { @@ -61,6 +68,51 @@ void DownloadAction::handle(HttpRequest *req, HttpChannel *channel) { } } + status = check_path_is_allowed(file_param); + if (!status.ok()) { + std::string error_msg = status.get_error_msg(); + HttpResponse response(HttpStatus::OK, &error_msg); + channel->send_response(response); + return; + } + + if (FileUtils::is_dir(file_param)) { + do_dir_response(file_param, req, channel); + } else { + do_file_response(file_param, req, channel); + } +} + +void DownloadAction::handle_error_log( + HttpRequest *req, + HttpChannel *channel, + const std::string& file_param) { + const std::string absolute_path = _error_log_root_dir + "/" + file_param; + + Status status = check_log_path_is_allowed(absolute_path); + if (!status.ok()) { + std::string error_msg = status.get_error_msg(); + HttpResponse response(HttpStatus::OK, &error_msg); + channel->send_response(response); + return; + } + + if (FileUtils::is_dir(absolute_path)) { + std::string error_msg = "error log can only be file."; + HttpResponse response(HttpStatus::OK, &error_msg); + channel->send_response(response); + return; + } + + do_file_response(absolute_path, req, channel); +} + +void DownloadAction::handle(HttpRequest *req, HttpChannel *channel) { + LOG(INFO) << "accept one download request " << req->debug_string(); + + // add tid to cgroup in order to limit read bandwidth + CgroupsMgr::apply_system_cgroup(); + // Get 'file' parameter, then assembly file absolute path const std::string& file_path = req->param(FILE_PARAMETER); if (file_path.empty()) { @@ -71,23 +123,13 @@ void DownloadAction::handle(HttpRequest *req, HttpChannel *channel) { return; } - status = check_path(file_path); - if (!status.ok()) { - std::string error_msg = status.get_error_msg(); - HttpResponse response(HttpStatus::OK, &error_msg); - channel->send_response(response); - return; + if (_download_type == ERROR_LOG) { + handle_error_log(req, channel, file_path); + } else if (_download_type == NORMAL) { + handle_normal(req, channel, file_path); } - VLOG_ROW << "absolute download path: " << file_path; - - if (FileUtils::is_dir(file_path)) { - do_dir_response(file_path, req, channel); - return; - } else { - do_file_response(file_path, req, channel); - } - LOG(INFO) << "deal with requesst finished! "; + LOG(INFO) << "deal with download requesst finished! "; } void DownloadAction::do_dir_response( @@ -236,7 +278,8 @@ Status DownloadAction::check_token(HttpRequest *req) { return Status::OK; } -Status DownloadAction::check_path(const std::string& file_path) { +Status DownloadAction::check_path_is_allowed(const std::string& file_path) { + DCHECK_EQ(_download_type, NORMAL); for (auto& allow_path : _allow_paths) { if (FileSystemUtil::contain_path(allow_path, file_path)) { return Status::OK; @@ -246,5 +289,14 @@ Status DownloadAction::check_path(const std::string& file_path) { return Status("file path Not Allowed."); } +Status DownloadAction::check_log_path_is_allowed(const std::string& file_path) { + DCHECK_EQ(_download_type, ERROR_LOG); + if (FileSystemUtil::contain_path(_error_log_root_dir, file_path)) { + return Status::OK; + } + + return Status("file path Not Allowed."); +} + } // end namespace palo diff --git a/be/src/http/download_action.h b/be/src/http/download_action.h index 3d843d768b..dca1253688 100644 --- a/be/src/http/download_action.h +++ b/be/src/http/download_action.h @@ -33,13 +33,28 @@ class DownloadAction : public HttpHandler { public: DownloadAction(ExecEnv* exec_env, const std::vector& allow_dirs); + // for load error + DownloadAction(ExecEnv* exec_env, const std::string& error_log_root_dir); + virtual ~DownloadAction() {} virtual void handle(HttpRequest *req, HttpChannel *channel); private: + enum DOWNLOAD_TYPE { + NORMAL = 1, + ERROR_LOG = 2, + }; + Status check_token(HttpRequest *req); - Status check_path(const std::string& path); + Status check_path_is_allowed(const std::string& path); + Status check_log_path_is_allowed(const std::string& file_path); + + void handle_normal(HttpRequest *req, HttpChannel *channel, const std::string& file_param); + void handle_error_log( + HttpRequest *req, + HttpChannel *channel, + const std::string& file_param); void do_file_response(const std::string& dir_path, HttpRequest *req, HttpChannel *channel); void do_dir_response(const std::string& dir_path, HttpRequest *req, HttpChannel *channel); @@ -55,7 +70,11 @@ private: std::string get_content_type(const std::string& file_name); ExecEnv* _exec_env; + DOWNLOAD_TYPE _download_type; + std::vector _allow_paths; + std::string _error_log_root_dir; + }; // end class DownloadAction diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index 85543aa7e3..45e525d459 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -62,6 +62,7 @@ static const std::string DPP_PREFIX = "/dpp_download"; static const std::string SNAPSHOT_PREFIX = "/snapshot"; static const std::string TRASH_PREFIX = "/trash"; static const std::string UNUSED_PREFIX = "/unused"; +static const std::string ERROR_LOG_PREFIX = "/error_log"; static const int32_t OLAP_DATA_VERSION_APPLIED = PALO_V1; diff --git a/be/src/runtime/etl_job_mgr.cpp b/be/src/runtime/etl_job_mgr.cpp index 7e35c8453b..756a29b246 100644 --- a/be/src/runtime/etl_job_mgr.cpp +++ b/be/src/runtime/etl_job_mgr.cpp @@ -45,6 +45,14 @@ std::string EtlJobMgr::to_http_path(const std::string& file_name) { return url.str(); } +std::string EtlJobMgr::to_load_error_http_path(const std::string& file_name) { + std::stringstream url; + url << "http://" << BackendOptions::get_localhost() << ":" << config::webserver_port + << "/api/_load_error_log?" + << "&file=" << file_name; + return url.str(); +} + const std::string DPP_NORMAL_ALL = "dpp.norm.ALL"; const std::string DPP_ABNORMAL_ALL = "dpp.abnorm.ALL"; const std::string ERROR_FILE_PREFIX = "error_log"; @@ -239,7 +247,7 @@ Status EtlJobMgr::get_job_state(const TUniqueId& id, if (!ctx.result.debug_path.empty()) { result->__set_tracking_url( - to_http_path(ctx.result.debug_path)); + to_load_error_http_path(ctx.result.debug_path)); } return Status::OK; } diff --git a/be/src/runtime/etl_job_mgr.h b/be/src/runtime/etl_job_mgr.h index 7bdde9ffa0..dc674d1570 100644 --- a/be/src/runtime/etl_job_mgr.h +++ b/be/src/runtime/etl_job_mgr.h @@ -90,6 +90,7 @@ public: private: std::string to_http_path(const std::string& file_path); + std::string to_load_error_http_path(const std::string& file_path); void report_to_master(PlanFragmentExecutor* executor); diff --git a/be/src/runtime/exec_env.cpp b/be/src/runtime/exec_env.cpp index 6533d55d25..a65ce5240b 100644 --- a/be/src/runtime/exec_env.cpp +++ b/be/src/runtime/exec_env.cpp @@ -190,6 +190,13 @@ Status ExecEnv::start_webserver() { "/api/_tablet/_download", tablet_download_action); + DownloadAction* error_log_download_action = new DownloadAction( + this, _load_path_mgr->get_load_error_file_dir()); + _webserver->register_handler( + HttpMethod::GET, "/api/_load_error_log", error_log_download_action); + _webserver->register_handler( + HttpMethod::HEAD, "/api/_load_error_log", error_log_download_action); + // Register monitor MonitorAction* monitor_action = new MonitorAction(); monitor_action->register_module("etl_mgr", etl_job_mgr()); diff --git a/be/src/runtime/load_path_mgr.cpp b/be/src/runtime/load_path_mgr.cpp index 5433b20bb9..12483fb89d 100644 --- a/be/src/runtime/load_path_mgr.cpp +++ b/be/src/runtime/load_path_mgr.cpp @@ -25,6 +25,7 @@ #include "olap/olap_define.h" #include "olap/olap_rootpath.h" #include "util/file_utils.h" +#include "gen_cpp/Types_types.h" namespace palo { @@ -37,8 +38,13 @@ Status LoadPathMgr::init() { for (auto& one_path : all_available_root_path) { _path_vec.push_back(one_path + MINI_PREFIX); } - LOG(INFO) << "Load path configured to [" << boost::join(_path_vec, ",") << "]"; + + // error log is saved in first root path + _error_log_dir = all_available_root_path[0] + ERROR_LOG_PREFIX; + // check and make dir + RETURN_IF_ERROR(FileUtils::create_dir(_error_log_dir)); + _idx = 0; _reserved_hours = std::max(config::load_data_reserve_hours, 1L); pthread_create(&_cleaner_id, nullptr, LoadPathMgr::cleaner, this); @@ -83,21 +89,16 @@ Status LoadPathMgr::allocate_dir( return status; } -bool LoadPathMgr::can_delete_label(time_t cur_time, const std::string& label_dir) { +bool LoadPathMgr::is_too_old(time_t cur_time, const std::string& label_dir) { struct stat dir_stat; if (stat(label_dir.c_str(), &dir_stat)) { char buf[64]; // State failed, just information - LOG(WARNING) << "stat directory failed.path=" << label_dir + LOG(WARNING) << "stat directory failed.path=" << label_dir << ",code=" << strerror_r(errno, buf, 64); return false; } - if (!S_ISDIR(dir_stat.st_mode)) { - // Not a directory - return false; - } - if ((cur_time - dir_stat.st_mtime) < _reserved_hours * 3600) { return false; } @@ -110,6 +111,29 @@ void LoadPathMgr::get_load_data_path(std::vector* data_paths) { return; } +const std::string ERROR_FILE_NAME = "error_log"; + +Status LoadPathMgr::get_load_error_file_name( + const std::string& db, + const std::string&label, + const TUniqueId& fragment_instance_id, + std::string* error_path) { + std::stringstream ss; + ss << ERROR_FILE_NAME << "_" << db << "_" << label + << "_" << std::hex << fragment_instance_id.hi + << "_" << fragment_instance_id.lo; + *error_path = ss.str(); + return Status::OK; +} + +std::string LoadPathMgr::get_load_error_absolute_path(const std::string& file_name) { + std::string path; + path.append(_error_log_dir); + path.append("/"); + path.append(file_name); + return path; +} + void LoadPathMgr::clean_one_path(const std::string& path) { std::vector dbs; Status status = FileUtils::scan_dir(path, &dbs); @@ -130,7 +154,7 @@ void LoadPathMgr::clean_one_path(const std::string& path) { // delete this file for (auto& label : labels) { std::string label_dir = db_dir + "/" + label; - if (!can_delete_label(now, label_dir)) { + if (!is_too_old(now, label_dir)) { continue; } LOG(INFO) << "Going to remove load directory. path=" << label_dir; @@ -148,6 +172,31 @@ void LoadPathMgr::clean() { for (auto& path : _path_vec) { clean_one_path(path); } + clean_error_log(); +} + +void LoadPathMgr::clean_error_log() { + time_t now = time(nullptr); + std::vector error_logs; + Status status = FileUtils::scan_dir(_error_log_dir, &error_logs); + if (!status.ok()) { + LOG(WARNING) << "scan error_log dir failed. dir=" << _error_log_dir; + return; + } + + for (auto& error_log : error_logs) { + std::string log_path = _error_log_dir + "/" + error_log; + if (!is_too_old(now, log_path)) { + continue; + } + LOG(INFO) << "Going to remove error log file. path=" << log_path; + status = FileUtils::remove_all(log_path); + if (status.ok()) { + LOG(INFO) << "Remove load directory success. path=" << log_path; + } else { + LOG(WARNING) << "Remove load directory failed. path=" << log_path; + } + } } } diff --git a/be/src/runtime/load_path_mgr.h b/be/src/runtime/load_path_mgr.h index 07d5cf430f..97147bc365 100644 --- a/be/src/runtime/load_path_mgr.h +++ b/be/src/runtime/load_path_mgr.h @@ -24,6 +24,8 @@ namespace palo { +class TUniqueId; + // In every directory, '.trash' directory is used to save data need to delete // daemon thread is check no used directory to delete class LoadPathMgr { @@ -39,9 +41,20 @@ public: void get_load_data_path(std::vector* data_paths); + Status get_load_error_file_name( + const std::string& db, + const std::string&label, + const TUniqueId& fragment_instance_id, + std::string* error_path); + std::string get_load_error_absolute_path(const std::string& file_name); + const std::string& get_load_error_file_dir() const { + return _error_log_dir; + } + private: - bool can_delete_label(time_t cur_time, const std::string& label_dir); + bool is_too_old(time_t cur_time, const std::string& label_dir); void clean_one_path(const std::string& path); + void clean_error_log(); void clean(); static void* cleaner(void* param); @@ -51,6 +64,7 @@ private: int _idx; int _reserved_hours; pthread_t _cleaner_id; + std::string _error_log_dir; }; } diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 6fc316de0d..24e0b673ab 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -320,14 +320,18 @@ Status RuntimeState::create_load_dir() { Status RuntimeState::create_error_log_file() { // Make sure that load dir exists. - create_load_dir(); + // create_load_dir(); - std::stringstream ss; - ss << load_dir() << "/" << ERROR_FILE_NAME - << "_" << std::hex << fragment_instance_id().hi - << "_" << fragment_instance_id().lo; - _error_log_file_path = ss.str(); - _error_log_file = new std::ofstream(_error_log_file_path, std::ifstream::out); + _exec_env->load_path_mgr()->get_load_error_file_name( + _db_name, _import_label, _fragment_instance_id, &_error_log_file_path); + // std::stringstream ss; + // ss << load_dir() << "/" << ERROR_FILE_NAME + // << "_" << std::hex << fragment_instance_id().hi + // << "_" << fragment_instance_id().lo; + // _error_log_file_path = ss.str(); + std::string error_log_absolute_path + = _exec_env->load_path_mgr()->get_load_error_absolute_path(_error_log_file_path); + _error_log_file = new std::ofstream(error_log_absolute_path, std::ifstream::out); if (!_error_log_file->is_open()) { std::stringstream error_msg; error_msg << "Fail to open error file: [" << _error_log_file_path << "].";