From a686640ddbccd16a3a286a7704c9f8ce5aa69ed2 Mon Sep 17 00:00:00 2001 From: obdev Date: Sat, 10 Feb 2024 09:18:41 +0000 Subject: [PATCH] wildcard matching --- .../resolver/cmd/ob_load_data_resolver.cpp | 264 ++++++++++++++---- src/sql/resolver/cmd/ob_load_data_resolver.h | 3 + 2 files changed, 208 insertions(+), 59 deletions(-) diff --git a/src/sql/resolver/cmd/ob_load_data_resolver.cpp b/src/sql/resolver/cmd/ob_load_data_resolver.cpp index 006dbde1e2..559a592e33 100644 --- a/src/sql/resolver/cmd/ob_load_data_resolver.cpp +++ b/src/sql/resolver/cmd/ob_load_data_resolver.cpp @@ -25,6 +25,10 @@ #include "sql/resolver/dml/ob_delete_resolver.h" #include "lib/json/ob_json.h" #include "lib/json/ob_json_print_utils.h" +#include "share/backup/ob_backup_io_adapter.h" +#include "share/backup/ob_backup_struct.h" +#include "lib/restore/ob_storage_info.h" +#include namespace oceanbase { @@ -503,6 +507,58 @@ int ObLoadDataResolver::resolve_hints(const ParseNode &node) return ret; } +int ObLoadDataResolver::pattern_match(const ObString& str, const ObString& pattern, bool &matched) +{ + int ret = OB_SUCCESS; + bool *dp = nullptr; + int32_t m = str.length() + 1; + int32_t n = pattern.length() + 1; + ObMemAttr attr(MTL_ID(), "TLD_PATMATCH"); + if (OB_ISNULL(dp = (bool *)ob_malloc(m * n, attr))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret)); + } else { + memset(dp, false, m * n); + dp[0] = true; + for (int32_t j = 1; j < n; j++) { + if (pattern[j - 1] == '*') { + dp[j] = dp[j - 1]; + } + } + for (int32_t i = 1; i < m; i++) { + for (int32_t j = 1; j < n; j++) { + if (pattern[j - 1] == '?' || pattern[j - 1] == str[i - 1]) { + dp[i * n + j] = dp[(i - 1) * n + j - 1]; + } else if (pattern[j - 1] == '*') { + dp[i * n + j] = dp[(i - 1) * n + j] || dp[i * n + j - 1]; + } + } + } + matched = dp[m * n - 1]; + } + if (OB_NOT_NULL(dp)) { + ob_free(dp); + dp = nullptr; + } + + return ret; +} + +bool ObLoadDataResolver::exist_wildcard(const ObString& str) +{ + bool has_wildcard = false; + ObString wildcards("?*[]"); + for (int32_t i = 0; !has_wildcard && i < str.length(); i++) { + for (int32_t j = 0; !has_wildcard && j < wildcards.length(); j++) { + if (str[i] == wildcards[j]) { + has_wildcard = true; + } + } + } + + return has_wildcard; +} + int ObLoadDataResolver::resolve_filename(ObLoadDataStmt *load_stmt, ParseNode *node) { int ret = OB_SUCCESS; @@ -518,77 +574,167 @@ int ObLoadDataResolver::resolve_filename(ObLoadDataStmt *load_stmt, ParseNode *n if (OB_UNLIKELY(file_name.empty())) { ret = OB_FILE_NOT_EXIST; LOG_WARN("file not exist", K(ret), K(file_name)); - } else if (ObLoadFileLocation::OSS != load_args.load_file_storage_) { - load_args.file_name_ = file_name; + } else { const char *p = nullptr; ObString sub_file_name; ObString cstyle_file_name; // ends with '\0' - char *full_path_buf = nullptr; - char *actual_path = nullptr; - if (OB_ISNULL(full_path_buf = static_cast(allocator_->alloc(MAX_PATH_SIZE)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate memory", K(ret)); - } - while (OB_SUCC(ret) && !file_name.empty()) { - p = file_name.find(','); - if (nullptr == p) { - sub_file_name = file_name; - cstyle_file_name = sub_file_name; - file_name.reset(); + if (ObLoadFileLocation::SERVER_DISK == load_args.load_file_storage_) { + load_args.file_name_ = file_name; + char *full_path_buf = nullptr; + char *actual_path = nullptr; + ObArray match_array; + if (OB_ISNULL(full_path_buf = static_cast(allocator_->alloc(MAX_PATH_SIZE)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret)); + } else if (exist_wildcard(file_name)) { + sub_file_name = file_name.trim_space_only(); + if (OB_FAIL(ob_write_string(*allocator_, sub_file_name, cstyle_file_name, true))) { + LOG_WARN("fail to write string", K(ret)); + } else { + glob_t glob_result; + int return_value = glob(cstyle_file_name.ptr(), 0, NULL, &glob_result); + if (return_value == GLOB_NOMATCH) { + ret = OB_FILE_NOT_EXIST; + LOG_WARN("No matches found for pattern", K(ret), K(ObString(cstyle_file_name))); + } else if (return_value != 0) { + ret = OB_ERR_SYS; + LOG_WARN("fail to glob", K(ObString(cstyle_file_name))); + } else { + for (size_t i = 0; OB_SUCC(ret) && i < glob_result.gl_pathc; ++i) { + ObString match_file; + if (OB_FAIL(ob_write_string(*allocator_, ObString(glob_result.gl_pathv[i]), match_file, true))) { + LOG_WARN("fail to ob_write_string", K(ret)); + } else if (OB_FAIL(match_array.push_back(match_file))) { + LOG_WARN("fail to push back", K(ret)); + } + } + globfree(&glob_result); + } + } } else { - sub_file_name = file_name.split_on(p); - cstyle_file_name.reset(); + while (OB_SUCC(ret) && !file_name.empty()) { + p = file_name.find(','); + if (nullptr == p) { + sub_file_name = file_name.trim_space_only(); + cstyle_file_name = sub_file_name; + file_name.reset(); + } else { + sub_file_name = file_name.split_on(p).trim_space_only(); + cstyle_file_name.reset(); + } + if (!sub_file_name.empty()) { + if (cstyle_file_name.empty() && OB_FAIL(ob_write_string(*allocator_, sub_file_name, cstyle_file_name, true))) { + LOG_WARN("fail to write string", K(ret)); + } else if (OB_FAIL(match_array.push_back(cstyle_file_name))) { + LOG_WARN("fail to push back", K(ret)); + } + } + } } - - if (!sub_file_name.empty()) { - if (cstyle_file_name.empty() && - OB_FAIL(ob_write_string(*allocator_, sub_file_name, cstyle_file_name, true))) { - LOG_WARN("fail to write string", KR(ret)); - } else if (ObLoadFileLocation::SERVER_DISK == load_args.load_file_storage_ && - OB_ISNULL(actual_path = realpath(cstyle_file_name.ptr(), full_path_buf))) { + if (OB_SUCC(ret)) { + if (match_array.size() == 0) { ret = OB_FILE_NOT_EXIST; - LOG_WARN("file not exist", K(ret), K(cstyle_file_name)); - } - - //security check for mysql mode - if (OB_SUCC(ret) && ObLoadFileLocation::SERVER_DISK == load_args.load_file_storage_) { - ObString secure_file_priv; - if (OB_FAIL(session_info_->get_secure_file_priv(secure_file_priv))) { - LOG_WARN("failed to get secure file priv", K(ret)); - } else if (OB_FAIL( - ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) { - LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv), - K(actual_path)); - } - } - - if (OB_SUCC(ret)) { - if (ObLoadFileLocation::CLIENT_DISK == load_args.load_file_storage_ && load_args.file_iter_.count() != 0) { - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "load multi files not supported"); - } else if (OB_FAIL(load_args.file_iter_.add_files(&cstyle_file_name))) { - LOG_WARN("fail to add files", KR(ret)); + LOG_WARN("files not exists", K(ret)); + } else { + for (int32_t i = 0; OB_SUCC(ret) && i < match_array.size(); i++) { + //security check for mysql mode + ObString secure_file_priv; + if (OB_ISNULL(actual_path = realpath(match_array[i].ptr(), full_path_buf))) { + ret = OB_FILE_NOT_EXIST; + LOG_WARN("file not exist", K(ret), K(i), K(match_array[i])); + } else if (OB_FAIL(session_info_->get_secure_file_priv(secure_file_priv))) { + LOG_WARN("failed to get secure file priv", K(ret)); + } else if (OB_FAIL(ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) { + LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv), K(actual_path)); + } else if (OB_FAIL(load_args.file_iter_.add_files(&match_array[i]))) { + LOG_WARN("fail to add files", K(ret)); + } } } } - } - } else { - ObString temp_file_name = file_name.split_on('?'); - ObString storage_info; - if (OB_FAIL(ob_write_string(*allocator_, temp_file_name, load_args.file_name_, true))) { - LOG_WARN("fail to copy string", K(ret)); - } else if (OB_FAIL(ob_write_string(*allocator_, file_name, storage_info, true))) { - LOG_WARN("fail to copy string", K(ret)); - } else if (temp_file_name.length() <= 0 || storage_info.length() <= 0) { - ret = OB_INVALID_ARGUMENT; - LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name or access key"); - } else if (OB_FAIL(load_args.access_info_.set(load_args.file_name_.ptr(), storage_info.ptr()))) { - LOG_WARN("failed to set access info", K(ret)); - } else if (OB_FAIL(load_args.file_iter_.add_files(&load_args.file_name_))) { - LOG_WARN("fail to add files", KR(ret)); + } else if (ObLoadFileLocation::OSS == load_args.load_file_storage_) { + const char *storage_ptr = nullptr; + const char *file_ptr = nullptr; + if (OB_ISNULL(storage_ptr = file_name.reverse_find('?'))) { + ret = OB_INVALID_ARGUMENT; + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name or access key"); + } else { + ObString temp_file_name = file_name.split_on(storage_ptr).trim_space_only(); + ObString storage_info; + bool matched = false; + ObString pattern; + ObString dir_path; + char *path = nullptr; + int64_t path_len = 0; + ObArray file_list; + if (OB_FAIL(ob_write_string(*allocator_, temp_file_name, load_args.file_name_, true))) { + LOG_WARN("fail to copy string", K(ret)); + } else if (OB_FAIL(ob_write_string(*allocator_, file_name, storage_info, true))) { + LOG_WARN("fail to copy string", K(ret)); + } else if (temp_file_name.length() <= 0 || storage_info.length() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name or access key"); + } else if (OB_FAIL(load_args.access_info_.set(load_args.file_name_.ptr(), storage_info.ptr()))) { + LOG_WARN("failed to set access info", K(ret)); + } else if (OB_ISNULL(file_ptr = temp_file_name.reverse_find('/'))) { + ret = OB_INVALID_ARGUMENT; + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name"); + } else { + dir_path.assign_ptr(temp_file_name.ptr(), file_ptr - temp_file_name.ptr() + 1); + pattern.assign_ptr(file_ptr + 1, temp_file_name.length() - dir_path.length()); + if (exist_wildcard(dir_path)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("directory does not support wildcard matching", K(ret)); + } else { + ObBackupIoAdapter adapter; + ObFileListArrayOp op(file_list, *allocator_); + if (OB_ISNULL(path = static_cast(allocator_->alloc(MAX_PATH_SIZE)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret)); + } else if (OB_FAIL(databuff_printf(path, MAX_PATH_SIZE, path_len, "%.*s", + dir_path.length(), dir_path.ptr()))) { + LOG_WARN("fail to fill path", K(ret), K(path_len)); + } else if (OB_FAIL(adapter.list_files(ObString(path_len, path), &load_args.access_info_, op))) { + LOG_WARN("fail to list files", K(ret)); + } + } + } + for (int32_t i = 0; OB_SUCC(ret) && i < file_list.size(); i++) { + if (OB_FAIL(pattern_match(file_list[i], pattern, matched))) { + LOG_WARN("fail to pattern match", K(ret)); + } else if (matched) { + ObString match_file; + int64_t pos = path_len; + if (OB_FAIL(databuff_printf(path, MAX_PATH_SIZE, pos, "%.*s", + file_list[i].length(), file_list[i].ptr()))) { + LOG_WARN("fail to fill path", K(ret)); + } else if (OB_FAIL(ob_write_string(*allocator_, ObString(pos, path), match_file, true))) { + LOG_WARN("fail to copy string", K(ret)); + } else if (OB_FAIL(load_args.file_iter_.add_files(&match_file))) { + LOG_WARN("fail to add files", K(ret)); + } + } + } + if (OB_SUCC(ret) && load_args.file_iter_.count() == 0) { + ret = OB_BACKUP_FILE_NOT_EXIST; + LOG_WARN("No matches found for pattern", K(ret), K(pattern)); + } + } + } else { + if (!file_name.empty()) { + if (OB_NOT_NULL(p = file_name.find(','))) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "load multi files not supported"); + } else if (OB_FAIL(ob_write_string(*allocator_, file_name, cstyle_file_name, true))) { + LOG_WARN("fail to copy string", K(ret)); + } else if (OB_FAIL(load_args.file_iter_.add_files(&cstyle_file_name))) { + LOG_WARN("fail to add files", K(ret)); + } + } } } } + return ret; } diff --git a/src/sql/resolver/cmd/ob_load_data_resolver.h b/src/sql/resolver/cmd/ob_load_data_resolver.h index 47440d23db..0c77ae20a9 100644 --- a/src/sql/resolver/cmd/ob_load_data_resolver.h +++ b/src/sql/resolver/cmd/ob_load_data_resolver.h @@ -62,6 +62,9 @@ public: int local_infile_enabled(bool &enabled) const; int check_trigger_constraint(const ObTableSchema *table_schema); +private: + int pattern_match(const ObString& str, const ObString& pattern, bool &matched); + bool exist_wildcard(const ObString& str); private: enum ParameterEnum { ENUM_OPT_LOCAL = 0,