Refine filtering external files by pattern option

This commit is contained in:
wjhh2008
2023-12-07 04:46:33 +00:00
committed by ob-robot
parent 0e346eecdd
commit 0fe424c626
23 changed files with 282 additions and 208 deletions

View File

@ -41,6 +41,7 @@ namespace oceanbase
{
using namespace observer;
using namespace common;
using namespace sql;
using namespace transaction::tablelock;
namespace share
{
@ -243,17 +244,20 @@ int ObExternalTableFileManager::update_inner_table_file_list(
return ret;
}
int ObExternalTableFileManager::get_external_file_list_on_device(const ObString &location,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_sizes,
const ObString &access_info,
ObIAllocator &allocator)
int ObExternalTableFileManager::get_external_file_list_on_device(
const ObString &location,
const ObString &pattern,
const ObExprRegexpSessionVariables &regexp_vars,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_sizes,
const ObString &access_info,
ObIAllocator &allocator)
{
int ret = OB_SUCCESS;
sql::ObExternalDataAccessDriver driver;
if (OB_FAIL(driver.init(location, access_info))) {
LOG_WARN("init external data access driver failed", K(ret));
} else if (OB_FAIL(driver.get_file_list(location, file_urls, allocator))) {
} else if (OB_FAIL(driver.get_file_list(location, pattern, regexp_vars, file_urls, allocator))) {
LOG_WARN("get file urls failed", K(ret));
} else if (OB_FAIL(driver.get_file_sizes(location, file_urls, file_sizes))) {
LOG_WARN("get file sizes failed", K(ret));

View File

@ -17,6 +17,9 @@
#include "observer/ob_server_struct.h"
namespace oceanbase {
namespace sql {
class ObExprRegexpSessionVariables;
}
namespace share {
@ -124,11 +127,13 @@ public:
const uint64_t table_id,
ObMySQLTransaction &trans);
int get_external_file_list_on_device(const ObString &location,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_sizes,
const ObString &access_info,
ObIAllocator &allocator);
int get_external_file_list_on_device(const common::ObString &location,
const common::ObString &pattern,
const sql::ObExprRegexpSessionVariables &regexp_vars,
common::ObIArray<common::ObString> &file_urls,
common::ObIArray<int64_t> &file_sizes,
const common::ObString &access_info,
common::ObIAllocator &allocator);
private:

View File

@ -39,11 +39,12 @@ int ObAsyncLoadExternalTableFileListP::process()
int ret = OB_SUCCESS;
ObLoadExternalFileListReq &req = arg_;
ObLoadExternalFileListRes &res = result_;
ObString &location = req.location_;
ObSEArray<ObString, 16> file_urls;
ObString access_info;
ObArenaAllocator allocator;
if (OB_FAIL(ObExternalTableFileManager::get_instance().get_external_file_list_on_device(location,
if (OB_FAIL(ObExternalTableFileManager::get_instance().get_external_file_list_on_device(req.location_,
req.pattern_,
req.regexp_vars_,
file_urls,
res.file_sizes_,
access_info,
@ -56,7 +57,7 @@ int ObAsyncLoadExternalTableFileListP::process()
OZ(res.file_urls_.push_back(tmp));
}
res.rcode_.rcode_ = ret;
LOG_DEBUG("get external table file", K(ret), K(location), K(file_urls), K(res.file_urls_));
LOG_DEBUG("get external table file", K(ret), K(req.location_), K(req.pattern_), K(file_urls), K(res.file_urls_));
return ret;
}

View File

@ -23,7 +23,7 @@ OB_SERIALIZE_MEMBER(ObFlushExternalTableFileCacheReq, tenant_id_, table_id_, par
OB_SERIALIZE_MEMBER(ObFlushExternalTableFileCacheRes, rcode_);
OB_SERIALIZE_MEMBER(ObLoadExternalFileListReq, location_);
OB_SERIALIZE_MEMBER(ObLoadExternalFileListReq, location_, pattern_, regexp_vars_);
OB_DEF_SERIALIZE(ObLoadExternalFileListRes)
{

View File

@ -14,6 +14,7 @@
#define OBDEV_SRC_EXTERNAL_TABLE_FILE_TASK_H_
#include "rpc/obrpc/ob_rpc_result_code.h"
#include "deps/oblib/src/lib/lock/ob_thread_cond.h"
#include "sql/engine/expr/ob_expr_regexp_context.h"
namespace oceanbase
{
namespace share
@ -47,9 +48,11 @@ class ObLoadExternalFileListReq
OB_UNIS_VERSION(1);
public:
ObLoadExternalFileListReq() :
location_() {}
location_(), pattern_() {}
public:
ObString location_;
ObString pattern_;
sql::ObExprRegexpSessionVariables regexp_vars_;
TO_STRING_KV(K_(location));
};

View File

@ -22,6 +22,7 @@
#include "sql/engine/table/ob_external_table_access_service.h"
#include "sql/ob_sql_utils.h"
#include "sql/rewrite/ob_query_range.h"
#include "share/backup/ob_backup_io_adapter.h"
namespace oceanbase
{
@ -319,48 +320,44 @@ int ObExternalTableUtils::prepare_single_scan_range(const uint64_t tenant_id,
return ret;
}
int ObExternalTableUtils::filter_external_table_files(const ObString &pattern,
ObExecContext &exec_ctx,
ObIArray<ObString> &file_urls)
bool ObExternalPathFilter::is_inited() {
return regex_ctx_.is_inited();
}
int ObExternalPathFilter::is_filtered(const ObString &path, bool &is_filtered)
{
int ret = OB_SUCCESS;
if (!pattern.empty()) {
const common::ObCollationType cs_type_pattern = CS_TYPE_UTF8MB4_BIN;
const common::ObCollationType cs_type_file = CS_TYPE_UTF8MB4_BIN;
const common::ObCollationType cs_type_match = CS_TYPE_UTF16_BIN;
ObExprRegexContext regex_ctx;
ObArenaAllocator allocator;
bool match = false;
ObString out_text;
if (OB_FAIL(ObExprUtil::convert_string_collation(path,
CS_TYPE_UTF8MB4_BIN,
out_text,
CS_TYPE_UTF16_BIN,
temp_allocator_))) {
LOG_WARN("convert charset failed", K(ret));
} else if (OB_FAIL(regex_ctx_.match(temp_allocator_, out_text, 0, match))) {
LOG_WARN("regex match failed", K(ret));
}
is_filtered = !match;
temp_allocator_.reuse();
return ret;
}
int ObExternalPathFilter::init(const ObString &pattern,
const ObExprRegexpSessionVariables &regexp_vars)
{
int ret = OB_SUCCESS;
if (regex_ctx_.is_inited()) {
ret = OB_INIT_TWICE;
LOG_WARN("fail to init", K(ret));
} else {
uint32_t flags = 0;
ObString match_string;
ObSEArray<ObString, 8> tmp_file_urls;
if (OB_FAIL(ObExprRegexContext::get_regexp_flags(match_string, true, flags))) {
LOG_WARN("failed to get regexp flags", K(ret));
} else if (OB_FAIL(regex_ctx.init(exec_ctx.get_allocator(),
exec_ctx.get_my_session(),
pattern,
flags,
true,
cs_type_pattern))) {
} else if (OB_FAIL(regex_ctx_.init(allocator_, regexp_vars,
pattern, flags, true, CS_TYPE_UTF8MB4_BIN))) {
LOG_WARN("init regex context failed", K(ret), K(pattern));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); ++i) {
bool match = false;
ObString out_text;
if (OB_FAIL(ObExprUtil::convert_string_collation(file_urls.at(i),
cs_type_file,
out_text,
cs_type_match,
allocator))) {
LOG_WARN("convert charset failed", K(ret));
} else if (OB_FAIL(regex_ctx.match(allocator, out_text, 0, match))) {
LOG_WARN("regex match failed", K(ret));
} else if (match && OB_FAIL(tmp_file_urls.push_back(file_urls.at(i)))) {
LOG_WARN("failed to push back into tmp_file_urls", K(ret));
}
}
if (OB_SUCC(ret) && OB_FAIL(file_urls.assign(tmp_file_urls))) {
LOG_WARN("failed to assign file_urls", K(ret));
}
}
}
return ret;

View File

@ -14,6 +14,8 @@
#define _OB_EXTERNAL_TABLE_UTILS_H_
#include "lib/container/ob_iarray.h"
#include "lib/string/ob_string.h"
#include "lib/allocator/page_arena.h"
namespace oceanbase
{
@ -30,10 +32,24 @@ class ObDASTabletLoc;
class ObExecContext;
class ObExternalTableAccessService;
class ObQueryRange;
class ObExprRegexContext;
class ObExprRegexpSessionVariables;
}
namespace share
{
struct ObExternalPathFilter {
ObExternalPathFilter(sql::ObExprRegexContext &regex_ctx, common::ObIAllocator &allocator)
: regex_ctx_(regex_ctx), allocator_(allocator) {}
int init(const common::ObString &pattern, const sql::ObExprRegexpSessionVariables &regexp_vars);
bool is_inited();
int is_filtered(const common::ObString &path, bool &is_filtered);
sql::ObExprRegexContext &regex_ctx_;
common::ObIAllocator &allocator_;
common::ObArenaAllocator temp_allocator_;
};
class ObExternalTableUtils {
public:
enum ExternalTableRangeColumn {
@ -78,9 +94,6 @@ class ObExternalTableUtils {
common::ObIArray<common::ObNewRange *> &new_range,
bool is_file_on_disk);
static int filter_external_table_files(const common::ObString &pattern,
sql::ObExecContext &exec_ctx,
common::ObIArray<common::ObString> &file_urls);
static int calc_assigned_files_to_sqcs(
const common::ObIArray<ObExternalFileInfo> &files,
common::ObIArray<int64_t> &assigned_idx,