Refine filtering external files by pattern option
This commit is contained in:
@ -41,6 +41,7 @@ namespace oceanbase
|
||||
{
|
||||
using namespace observer;
|
||||
using namespace common;
|
||||
using namespace sql;
|
||||
using namespace transaction::tablelock;
|
||||
namespace share
|
||||
{
|
||||
@ -243,17 +244,20 @@ int ObExternalTableFileManager::update_inner_table_file_list(
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExternalTableFileManager::get_external_file_list_on_device(const ObString &location,
|
||||
ObIArray<ObString> &file_urls,
|
||||
ObIArray<int64_t> &file_sizes,
|
||||
const ObString &access_info,
|
||||
ObIAllocator &allocator)
|
||||
int ObExternalTableFileManager::get_external_file_list_on_device(
|
||||
const ObString &location,
|
||||
const ObString &pattern,
|
||||
const ObExprRegexpSessionVariables ®exp_vars,
|
||||
ObIArray<ObString> &file_urls,
|
||||
ObIArray<int64_t> &file_sizes,
|
||||
const ObString &access_info,
|
||||
ObIAllocator &allocator)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
sql::ObExternalDataAccessDriver driver;
|
||||
if (OB_FAIL(driver.init(location, access_info))) {
|
||||
LOG_WARN("init external data access driver failed", K(ret));
|
||||
} else if (OB_FAIL(driver.get_file_list(location, file_urls, allocator))) {
|
||||
} else if (OB_FAIL(driver.get_file_list(location, pattern, regexp_vars, file_urls, allocator))) {
|
||||
LOG_WARN("get file urls failed", K(ret));
|
||||
} else if (OB_FAIL(driver.get_file_sizes(location, file_urls, file_sizes))) {
|
||||
LOG_WARN("get file sizes failed", K(ret));
|
||||
|
||||
@ -17,6 +17,9 @@
|
||||
#include "observer/ob_server_struct.h"
|
||||
|
||||
namespace oceanbase {
|
||||
namespace sql {
|
||||
class ObExprRegexpSessionVariables;
|
||||
}
|
||||
|
||||
namespace share {
|
||||
|
||||
@ -124,11 +127,13 @@ public:
|
||||
const uint64_t table_id,
|
||||
ObMySQLTransaction &trans);
|
||||
|
||||
int get_external_file_list_on_device(const ObString &location,
|
||||
ObIArray<ObString> &file_urls,
|
||||
ObIArray<int64_t> &file_sizes,
|
||||
const ObString &access_info,
|
||||
ObIAllocator &allocator);
|
||||
int get_external_file_list_on_device(const common::ObString &location,
|
||||
const common::ObString &pattern,
|
||||
const sql::ObExprRegexpSessionVariables ®exp_vars,
|
||||
common::ObIArray<common::ObString> &file_urls,
|
||||
common::ObIArray<int64_t> &file_sizes,
|
||||
const common::ObString &access_info,
|
||||
common::ObIAllocator &allocator);
|
||||
|
||||
private:
|
||||
|
||||
|
||||
@ -39,11 +39,12 @@ int ObAsyncLoadExternalTableFileListP::process()
|
||||
int ret = OB_SUCCESS;
|
||||
ObLoadExternalFileListReq &req = arg_;
|
||||
ObLoadExternalFileListRes &res = result_;
|
||||
ObString &location = req.location_;
|
||||
ObSEArray<ObString, 16> file_urls;
|
||||
ObString access_info;
|
||||
ObArenaAllocator allocator;
|
||||
if (OB_FAIL(ObExternalTableFileManager::get_instance().get_external_file_list_on_device(location,
|
||||
if (OB_FAIL(ObExternalTableFileManager::get_instance().get_external_file_list_on_device(req.location_,
|
||||
req.pattern_,
|
||||
req.regexp_vars_,
|
||||
file_urls,
|
||||
res.file_sizes_,
|
||||
access_info,
|
||||
@ -56,7 +57,7 @@ int ObAsyncLoadExternalTableFileListP::process()
|
||||
OZ(res.file_urls_.push_back(tmp));
|
||||
}
|
||||
res.rcode_.rcode_ = ret;
|
||||
LOG_DEBUG("get external table file", K(ret), K(location), K(file_urls), K(res.file_urls_));
|
||||
LOG_DEBUG("get external table file", K(ret), K(req.location_), K(req.pattern_), K(file_urls), K(res.file_urls_));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -23,7 +23,7 @@ OB_SERIALIZE_MEMBER(ObFlushExternalTableFileCacheReq, tenant_id_, table_id_, par
|
||||
|
||||
OB_SERIALIZE_MEMBER(ObFlushExternalTableFileCacheRes, rcode_);
|
||||
|
||||
OB_SERIALIZE_MEMBER(ObLoadExternalFileListReq, location_);
|
||||
OB_SERIALIZE_MEMBER(ObLoadExternalFileListReq, location_, pattern_, regexp_vars_);
|
||||
|
||||
OB_DEF_SERIALIZE(ObLoadExternalFileListRes)
|
||||
{
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#define OBDEV_SRC_EXTERNAL_TABLE_FILE_TASK_H_
|
||||
#include "rpc/obrpc/ob_rpc_result_code.h"
|
||||
#include "deps/oblib/src/lib/lock/ob_thread_cond.h"
|
||||
#include "sql/engine/expr/ob_expr_regexp_context.h"
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace share
|
||||
@ -47,9 +48,11 @@ class ObLoadExternalFileListReq
|
||||
OB_UNIS_VERSION(1);
|
||||
public:
|
||||
ObLoadExternalFileListReq() :
|
||||
location_() {}
|
||||
location_(), pattern_() {}
|
||||
public:
|
||||
ObString location_;
|
||||
ObString pattern_;
|
||||
sql::ObExprRegexpSessionVariables regexp_vars_;
|
||||
TO_STRING_KV(K_(location));
|
||||
};
|
||||
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include "sql/engine/table/ob_external_table_access_service.h"
|
||||
#include "sql/ob_sql_utils.h"
|
||||
#include "sql/rewrite/ob_query_range.h"
|
||||
#include "share/backup/ob_backup_io_adapter.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -319,48 +320,44 @@ int ObExternalTableUtils::prepare_single_scan_range(const uint64_t tenant_id,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExternalTableUtils::filter_external_table_files(const ObString &pattern,
|
||||
ObExecContext &exec_ctx,
|
||||
ObIArray<ObString> &file_urls)
|
||||
bool ObExternalPathFilter::is_inited() {
|
||||
return regex_ctx_.is_inited();
|
||||
}
|
||||
|
||||
int ObExternalPathFilter::is_filtered(const ObString &path, bool &is_filtered)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (!pattern.empty()) {
|
||||
const common::ObCollationType cs_type_pattern = CS_TYPE_UTF8MB4_BIN;
|
||||
const common::ObCollationType cs_type_file = CS_TYPE_UTF8MB4_BIN;
|
||||
const common::ObCollationType cs_type_match = CS_TYPE_UTF16_BIN;
|
||||
ObExprRegexContext regex_ctx;
|
||||
ObArenaAllocator allocator;
|
||||
bool match = false;
|
||||
ObString out_text;
|
||||
if (OB_FAIL(ObExprUtil::convert_string_collation(path,
|
||||
CS_TYPE_UTF8MB4_BIN,
|
||||
out_text,
|
||||
CS_TYPE_UTF16_BIN,
|
||||
temp_allocator_))) {
|
||||
LOG_WARN("convert charset failed", K(ret));
|
||||
} else if (OB_FAIL(regex_ctx_.match(temp_allocator_, out_text, 0, match))) {
|
||||
LOG_WARN("regex match failed", K(ret));
|
||||
}
|
||||
is_filtered = !match;
|
||||
temp_allocator_.reuse();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExternalPathFilter::init(const ObString &pattern,
|
||||
const ObExprRegexpSessionVariables ®exp_vars)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (regex_ctx_.is_inited()) {
|
||||
ret = OB_INIT_TWICE;
|
||||
LOG_WARN("fail to init", K(ret));
|
||||
} else {
|
||||
uint32_t flags = 0;
|
||||
ObString match_string;
|
||||
ObSEArray<ObString, 8> tmp_file_urls;
|
||||
if (OB_FAIL(ObExprRegexContext::get_regexp_flags(match_string, true, flags))) {
|
||||
LOG_WARN("failed to get regexp flags", K(ret));
|
||||
} else if (OB_FAIL(regex_ctx.init(exec_ctx.get_allocator(),
|
||||
exec_ctx.get_my_session(),
|
||||
pattern,
|
||||
flags,
|
||||
true,
|
||||
cs_type_pattern))) {
|
||||
} else if (OB_FAIL(regex_ctx_.init(allocator_, regexp_vars,
|
||||
pattern, flags, true, CS_TYPE_UTF8MB4_BIN))) {
|
||||
LOG_WARN("init regex context failed", K(ret), K(pattern));
|
||||
} else {
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); ++i) {
|
||||
bool match = false;
|
||||
ObString out_text;
|
||||
if (OB_FAIL(ObExprUtil::convert_string_collation(file_urls.at(i),
|
||||
cs_type_file,
|
||||
out_text,
|
||||
cs_type_match,
|
||||
allocator))) {
|
||||
LOG_WARN("convert charset failed", K(ret));
|
||||
} else if (OB_FAIL(regex_ctx.match(allocator, out_text, 0, match))) {
|
||||
LOG_WARN("regex match failed", K(ret));
|
||||
} else if (match && OB_FAIL(tmp_file_urls.push_back(file_urls.at(i)))) {
|
||||
LOG_WARN("failed to push back into tmp_file_urls", K(ret));
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret) && OB_FAIL(file_urls.assign(tmp_file_urls))) {
|
||||
LOG_WARN("failed to assign file_urls", K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
@ -14,6 +14,8 @@
|
||||
#define _OB_EXTERNAL_TABLE_UTILS_H_
|
||||
|
||||
#include "lib/container/ob_iarray.h"
|
||||
#include "lib/string/ob_string.h"
|
||||
#include "lib/allocator/page_arena.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -30,10 +32,24 @@ class ObDASTabletLoc;
|
||||
class ObExecContext;
|
||||
class ObExternalTableAccessService;
|
||||
class ObQueryRange;
|
||||
class ObExprRegexContext;
|
||||
class ObExprRegexpSessionVariables;
|
||||
}
|
||||
|
||||
namespace share
|
||||
{
|
||||
|
||||
struct ObExternalPathFilter {
|
||||
ObExternalPathFilter(sql::ObExprRegexContext ®ex_ctx, common::ObIAllocator &allocator)
|
||||
: regex_ctx_(regex_ctx), allocator_(allocator) {}
|
||||
int init(const common::ObString &pattern, const sql::ObExprRegexpSessionVariables ®exp_vars);
|
||||
bool is_inited();
|
||||
int is_filtered(const common::ObString &path, bool &is_filtered);
|
||||
sql::ObExprRegexContext ®ex_ctx_;
|
||||
common::ObIAllocator &allocator_;
|
||||
common::ObArenaAllocator temp_allocator_;
|
||||
};
|
||||
|
||||
class ObExternalTableUtils {
|
||||
public:
|
||||
enum ExternalTableRangeColumn {
|
||||
@ -78,9 +94,6 @@ class ObExternalTableUtils {
|
||||
common::ObIArray<common::ObNewRange *> &new_range,
|
||||
bool is_file_on_disk);
|
||||
|
||||
static int filter_external_table_files(const common::ObString &pattern,
|
||||
sql::ObExecContext &exec_ctx,
|
||||
common::ObIArray<common::ObString> &file_urls);
|
||||
static int calc_assigned_files_to_sqcs(
|
||||
const common::ObIArray<ObExternalFileInfo> &files,
|
||||
common::ObIArray<int64_t> &assigned_idx,
|
||||
|
||||
Reference in New Issue
Block a user