[FEAT MERGE] Support external table

Co-authored-by: jingtaoye35 <1255153887@qq.com>
This commit is contained in:
wjhh2008
2023-04-29 15:11:49 +00:00
committed by ob-robot
parent ecb74a122c
commit 09ed904b58
164 changed files with 8074 additions and 667 deletions

View File

@ -0,0 +1,512 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SHARE
#include "lib/oblog/ob_log.h"
#include "lib/oblog/ob_log_module.h"
#include "lib/string/ob_sql_string.h"
#include "lib/mysqlclient/ob_mysql_proxy.h"
#include "lib/mysqlclient/ob_mysql_transaction.h"
#include "lib/mysqlclient/ob_mysql_result.h"
#include "lib/mysqlclient/ob_mysql_connection.h"
#include "lib/mysqlclient/ob_mysql_statement.h"
#include "lib/mysqlclient/ob_mysql_connection_pool.h"
#include "lib/utility/ob_print_utils.h"
#include "lib/compress/ob_compressor_pool.h"
#include "share/ob_dml_sql_splicer.h"
#include "share/config/ob_server_config.h"
#include "share/schema/ob_schema_utils.h"
#include "share/schema/ob_schema_service.h"
#include "share/inner_table/ob_inner_table_schema_constants.h"
#include "observer/ob_sql_client_decorator.h"
#include "observer/ob_server_struct.h"
#include "lib/charset/ob_charset.h"
#include "share/schema/ob_schema_service_sql_impl.h"
#include "ob_external_table_file_mgr.h"
#include "storage/tablelock/ob_table_lock_service.h"
#include "observer/ob_inner_sql_connection.h"
#include "sql/engine/table/ob_external_table_access_service.h"
#include "share/external_table/ob_external_table_utils.h"
namespace oceanbase
{
using namespace observer;
using namespace common;
using namespace transaction::tablelock;
namespace share
{
int ObExternalTableFilesKey::deep_copy(char *buf, const int64_t buf_len, ObIKVCacheKey *&key) const
{
int ret = OB_SUCCESS;
ObExternalTableFilesKey *new_value = NULL;
ObDataBuffer allocator(buf, buf_len);
if (OB_ISNULL(new_value = OB_NEWx(ObExternalTableFilesKey, &allocator))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
} else {
new_value->tenant_id_ = this->tenant_id_;
new_value->table_id_ = this->table_id_;
new_value->partition_id_ = this->partition_id_;
key = new_value;
}
return ret;
}
int64_t ObExternalTableFiles::size() const
{
int64_t size = sizeof(*this) + sizeof(ObString) * file_urls_.count() + sizeof(int64_t) * file_ids_.count();
for (int i = 0; i < file_urls_.count(); ++i) {
size += file_urls_.at(i).length();
}
return size;
}
int ObExternalTableFiles::deep_copy(char *buf, const int64_t buf_len, ObIKVCacheValue *&value) const
{
int ret = OB_SUCCESS;
ObDataBuffer allocator(buf, buf_len);
ObExternalTableFiles *new_value = NULL;
if (OB_ISNULL(new_value = OB_NEWx(ObExternalTableFiles, &allocator))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
}
if (OB_SUCC(ret) && this->file_urls_.count() > 0) {
if (OB_FAIL(new_value->file_urls_.allocate_array(allocator, this->file_urls_.count()))) {
LOG_WARN("fail to allocate array", K(ret));
} else {
for (int i = 0; OB_SUCC(ret) && i < this->file_urls_.count(); i++) {
OZ (ob_write_string(allocator, this->file_urls_.at(i), new_value->file_urls_.at(i)));
}
}
}
if (OB_SUCC(ret) && this->file_ids_.count() > 0) {
if (OB_FAIL(new_value->file_ids_.allocate_array(allocator, this->file_ids_.count()))) {
LOG_WARN("fail to allocate array", K(ret));
} else {
MEMCPY(new_value->file_ids_.get_data(), this->file_ids_.get_data(),
sizeof(int64_t) * this->file_ids_.count());
}
}
if (OB_SUCC(ret)) {
new_value->create_ts_ = this->create_ts_;
}
value = new_value;
return ret;
}
int ObExternalTableFileManager::flush_cache(const uint64_t tenant_id, const uint64_t table_id)
{
int ret = OB_SUCCESS;
ObExternalTableFilesKey key;
key.tenant_id_ = tenant_id;
key.table_id_ = table_id;
key.partition_id_ = 0;
if (OB_FAIL(kv_cache_.erase(key))) {
if (OB_ENTRY_NOT_EXIST != ret) {
LOG_WARN("fail to erase value", K(ret), K(key));
} else {
ret = OB_SUCCESS;
}
}
return ret;
}
int ObExternalTableFileManager::clear_inner_table_files(
const uint64_t tenant_id,
const uint64_t table_id,
ObMySQLTransaction &trans)
{
int ret = OB_SUCCESS;
ObSqlString delete_sql;
int64_t affected_rows = 0;
OZ (delete_sql.assign_fmt("DELETE FROM %s WHERE TABLE_ID = %lu AND PART_ID = %lu",
OB_ALL_EXTERNAL_TABLE_FILE_TNAME, table_id, 0L));
OZ (trans.write(tenant_id, delete_sql.ptr(), affected_rows));
LOG_DEBUG("check clear rows", K(affected_rows));
return ret;
}
int ObExternalTableFileManager::init()
{
int ret = OB_SUCCESS;
OZ (kv_cache_.init("external_table_file_cache"));
return ret;
}
ObExternalTableFileManager &ObExternalTableFileManager::get_instance()
{
static ObExternalTableFileManager instance_;
return instance_;
}
int ObExternalTableFileManager::get_external_files(
const uint64_t tenant_id,
const uint64_t table_id,
const bool is_local_file_on_disk,
ObIAllocator &allocator,
ObIArray<ObExternalFileInfo> &external_files,
ObIArray<ObNewRange *> *range_filter /*default = NULL*/)
{
return get_external_files_by_part_id(tenant_id, table_id, 0UL, is_local_file_on_disk, allocator, external_files, range_filter);
}
int ObExternalTableFileManager::get_external_files_by_part_id(
const uint64_t tenant_id,
const uint64_t table_id,
const uint64_t partition_id,
const bool is_local_file_on_disk,
ObIAllocator &allocator,
ObIArray<ObExternalFileInfo> &external_files,
ObIArray<ObNewRange *> *range_filter /*default = NULL*/)
{
int ret = OB_SUCCESS;
ObKVCacheHandle handle;
const ObExternalTableFiles *ext_files = NULL;
ObExternalTableFilesKey key;
key.tenant_id_ = tenant_id;
key.table_id_ = table_id;
key.partition_id_ = partition_id;
if (OB_FAIL(kv_cache_.get(key, ext_files, handle))) {
if (OB_ENTRY_NOT_EXIST != ret) {
LOG_WARN("fail to get from KVCache", K(ret), K(key));
}
}
if ((OB_SUCC(ret) && is_cache_value_timeout(*ext_files))
|| OB_ENTRY_NOT_EXIST == ret) {
if (OB_FAIL(fill_cache_from_inner_table(key, ext_files, handle))) {
LOG_WARN("fail to fill cache from inner table", K(ret));
}
}
for (int i = 0; OB_SUCC(ret) && i < ext_files->file_urls_.count(); ++i) {
bool in_ranges = false;
if (range_filter != NULL && OB_FAIL(ObExternalTableUtils::is_file_id_in_ranges(*range_filter,
ext_files->file_ids_.at(i),
in_ranges))) {
LOG_WARN("failed to judge file id in ranges", K(ret));
} else if (range_filter == NULL || in_ranges) {
ObExternalFileInfo file_info;
ObString file_url = ext_files->file_urls_.at(i);
file_info.file_id_ = ext_files->file_ids_.at(i);
if (is_local_file_on_disk) {
ObString ip_port = file_url.split_on('%');
OZ (file_info.file_addr_.parse_from_string(ip_port));
}
OZ (ob_write_string(allocator, file_url, file_info.file_url_));
OZ (external_files.push_back(file_info));
}
}
LOG_DEBUG("get external file list result", K(table_id), K(external_files));
return ret;
}
int ObExternalTableFileManager::update_inner_table_file_list(
const uint64_t tenant_id,
const uint64_t table_id,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_sizes)
{
int ret = OB_SUCCESS;
ObMySQLTransaction trans;
OZ (trans.start(GCTX.sql_proxy_, tenant_id));
OZ (lock_for_refresh(trans, tenant_id, table_id));
OZ (update_inner_table_files_list_one_part(tenant_id, table_id, 0, trans, file_urls, file_sizes));
OZ (trans.end(true));
if (trans.is_started()) {
trans.end(false);
}
return ret;
}
int ObExternalTableFileManager::get_external_file_list_on_device(const ObString &location,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_sizes,
const ObString &access_info,
ObIAllocator &allocator)
{
int ret = OB_SUCCESS;
sql::ObExternalDataAccessDriver driver;
if (OB_FAIL(driver.init(location, access_info))) {
LOG_WARN("init external data access driver failed", K(ret));
} else if (OB_FAIL(driver.get_file_list(location, file_urls, allocator))) {
LOG_WARN("get file urls failed", K(ret));
} else if (OB_FAIL(driver.get_file_sizes(location, file_urls, file_sizes))) {
LOG_WARN("get file sizes failed", K(ret));
}
if (driver.is_opened()) {
driver.close();
}
LOG_DEBUG("show external table files", K(file_urls), K(access_info));
return ret;
}
int ObExternalTableFileManager::update_inner_table_files_list_one_part(
const uint64_t tenant_id,
const uint64_t table_id,
const uint64_t partition_id,
ObMySQLTransaction &trans,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_sizes)
{
int ret = OB_SUCCESS;
int64_t cur_time = ObTimeUtil::current_time();
ObSEArray<ObString, 16> old_file_urls;
ObSEArray<int64_t, 16> old_file_ids;
ObSEArray<ObString, 16> insert_file_urls;
ObSEArray<int64_t, 16> insert_file_ids;
ObSEArray<int64_t, 16> insert_file_sizes;
ObSEArray<ObString, 16> update_file_urls;
ObSEArray<int64_t, 16> update_file_sizes;
ObSEArray<int64_t, 16> update_file_ids;
ObSEArray<ObString, 16> delete_file_urls;
ObSEArray<int64_t, 16> delete_file_ids;
ObArenaAllocator allocator;
ObSqlString update_sql;
ObSqlString insert_sql;
ObSqlString delete_sql;
int64_t update_rows = 0;
int64_t insert_rows = 0;
int64_t max_file_id = 0;// ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_FILE_ID - 1
common::hash::ObHashMap<ObString, int64_t> hash_map;
OZ(hash_map.create(std::max(file_urls.count(), old_file_urls.count()) + 1, "ExternalFile"));
OZ(get_all_records_from_inner_table(allocator, tenant_id, table_id, partition_id, old_file_urls, old_file_ids));
for (int64_t i = 0; OB_SUCC(ret) && i < old_file_urls.count(); i++) {
OZ(hash_map.set_refactored(old_file_urls.at(i), old_file_ids.at(i)));
max_file_id = old_file_ids.at(i) > max_file_id ? old_file_ids.at(i) : max_file_id;
}
for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); i++) {
int64_t file_id = 0;
OZ(hash_map.get_refactored(file_urls.at(i), file_id));
if (ret == OB_HASH_NOT_EXIST) {
ret = OB_SUCCESS;
OZ(insert_file_urls.push_back(file_urls.at(i)));
OZ(insert_file_sizes.push_back(file_sizes.at(i)));
OZ(insert_file_ids.push_back(++max_file_id));
} else if (ret == OB_SUCCESS) {
OZ(update_file_urls.push_back(file_urls.at(i)));
OZ(update_file_sizes.push_back(file_sizes.at(i)));
OZ(update_file_ids.push_back(file_id));
}
}
OZ(hash_map.reuse());
for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); i++) {
OZ(hash_map.set_refactored(file_urls.at(i), 1));
}
for (int64_t i = 0; OB_SUCC(ret) && i < old_file_urls.count(); i++) {
int64_t existed = 0;
OZ(hash_map.get_refactored(old_file_urls.at(i), existed));
if (ret == OB_HASH_NOT_EXIST) {
ret = OB_SUCCESS;
OZ(delete_file_urls.push_back(old_file_urls.at(i)));
OZ(delete_file_ids.push_back(old_file_ids.at(i)));
}
}
if (OB_SUCC(ret) && delete_file_urls.count() > 0) {
OZ(delete_sql.assign_fmt("UPDATE %s SET DELETE_VERSION = %ld WHERE (TABLE_ID, PART_ID, FILE_ID) IN (",
OB_ALL_EXTERNAL_TABLE_FILE_TNAME, cur_time));
for (int64_t i = 0; OB_SUCC(ret) && i < delete_file_urls.count(); i++) {
OZ(delete_sql.append_fmt("%c(%ld, %ld, %ld)", (0 == i) ? ' ' : ',', table_id, partition_id,
delete_file_ids.at(i)));
}
OZ(delete_sql.append(")"));
OZ(trans.write(tenant_id, delete_sql.ptr(), update_rows));
}
if (OB_SUCC(ret) && update_file_urls.count() > 0) {
for (int64_t i = 0; OB_SUCC(ret) && i < update_file_urls.count(); i++) {
OZ(update_sql.assign_fmt("UPDATE %s SET"
" CREATE_VERSION = CASE WHEN DELETE_VERSION != %ld THEN %ld ELSE CREATE_VERSION end,"
" DELETE_VERSION = %ld, FILE_SIZE = %ld WHERE TABLE_ID = %lu AND PART_ID = %lu AND FILE_ID=%ld",
OB_ALL_EXTERNAL_TABLE_FILE_TNAME,
MAX_VERSION, cur_time,
MAX_VERSION, update_file_sizes.at(i), table_id, partition_id,
update_file_ids.at(i)));
OZ (trans.write(tenant_id, update_sql.ptr(), update_rows));
}
}
if (OB_SUCC(ret) && insert_file_urls.count() > 0) {
OZ(insert_sql.assign_fmt("INSERT INTO %s(TABLE_ID,PART_ID,FILE_ID,FILE_URL,CREATE_VERSION,DELETE_VERSION,FILE_SIZE) VALUES",
OB_ALL_EXTERNAL_TABLE_FILE_TNAME));
for (int64_t i = 0; OB_SUCC(ret) && i < insert_file_urls.count(); i++) {
OZ(insert_sql.append_fmt("%c(%lu,%lu,%ld,'%.*s',%ld,%ld,%ld)",
(0 == i) ? ' ' : ',', table_id, partition_id,
insert_file_ids.at(i),
insert_file_urls.at(i).length(), insert_file_urls.at(i).ptr(),
cur_time, MAX_VERSION, insert_file_sizes.at(i)));
}
OZ(trans.write(tenant_id, insert_sql.ptr(), insert_rows));
}
return ret;
}
int ObExternalTableFileManager::get_all_records_from_inner_table(ObIAllocator &allocator,
int64_t tenant_id,
int64_t table_id,
int64_t partition_id,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_ids)
{
int ret = OB_SUCCESS;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
sqlclient::ObMySQLResult *result = NULL;
ObSqlString sql;
OZ (sql.append_fmt("SELECT file_url, file_id FROM %s"
" WHERE table_id = %lu AND part_id = %lu",
OB_ALL_EXTERNAL_TABLE_FILE_TNAME, table_id, partition_id));
OZ (GCTX.sql_proxy_->read(res, tenant_id, sql.ptr()));
if (OB_SUCC(ret)) {
if (OB_ISNULL(result = res.get_result())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("result is null", K(ret));
} else {
while (OB_SUCC(result->next())) {
ObString file_url;
int64_t file_id;
EXTRACT_VARCHAR_FIELD_MYSQL(*result, "file_url", file_url);
EXTRACT_INT_FIELD_MYSQL(*result, "file_id", file_id, int64_t);
ObString tmp_url;
OZ (ob_write_string(allocator, file_url, tmp_url));
OZ (file_urls.push_back(tmp_url));
OZ (file_ids.push_back(file_id));
}
if (OB_FAIL(ret) && OB_ITER_END != ret) {
LOG_WARN("get next result failed", K(ret));
} else {
ret = OB_SUCCESS;
}
}
}
}
return ret;
}
int ObExternalTableFileManager::fill_cache_from_inner_table(
const ObExternalTableFilesKey &key,
const ObExternalTableFiles *&ext_files,
ObKVCacheHandle &handle)
{
int ret = OB_SUCCESS;
//only one worker need do the job
int64_t bucket_id = key.hash() % LOAD_CACHE_LOCK_CNT;
int64_t total_wait_secs = 0;
while (OB_FAIL(fill_cache_locks_[bucket_id].lock(LOCK_TIMEOUT))
&& OB_TIMEOUT == ret && !THIS_WORKER.is_timeout()) {
total_wait_secs += LOAD_CACHE_LOCK_CNT;
LOG_WARN("fill external table cache wait", K(total_wait_secs));
}
if (OB_SUCC(ret)) {
//try fetch again
if (OB_FAIL(kv_cache_.get(key, ext_files, handle))) {
if (OB_ENTRY_NOT_EXIST != ret) {
LOG_WARN("fail to get from KVCache", K(ret), K(key));
}
}
if ((OB_SUCC(ret) && is_cache_value_timeout(*ext_files))
|| OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
sqlclient::ObMySQLResult *result = NULL;
ObSqlString sql;
int64_t cur_time = ObTimeUtil::current_time();
if (OB_ISNULL(GCTX.sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
}
OZ (sql.append_fmt("SELECT file_url, file_id FROM %s"
" WHERE table_id = %lu AND part_id = %lu"
" AND create_version <=%ld AND %ld < delete_version",
OB_ALL_EXTERNAL_TABLE_FILE_TNAME, key.table_id_, key.partition_id_,
cur_time, cur_time));
OZ (GCTX.sql_proxy_->read(res, key.tenant_id_, sql.ptr()));
if (OB_SUCC(ret)) {
if (OB_ISNULL(result = res.get_result())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("result is null", K(ret));
} else {
ObSEArray<ObString, 16> temp_file_urls;
ObSEArray<int64_t, 16> temp_file_ids;
ObArenaAllocator allocator;
while (OB_SUCC(result->next())) {
ObString file_url;
ObString tmp_url;
int64_t file_id = INT64_MAX;
EXTRACT_VARCHAR_FIELD_MYSQL(*result, "file_url", tmp_url);
EXTRACT_INT_FIELD_MYSQL(*result, "file_id", file_id, int64_t);
OZ (ob_write_string(allocator, tmp_url, file_url));
OZ (temp_file_urls.push_back(file_url));
OZ (temp_file_ids.push_back(file_id));
}
if (OB_FAIL(ret) && OB_ITER_END != ret) {
LOG_WARN("get next result failed", K(ret));
} else {
ret = OB_SUCCESS;
}
if (OB_SUCC(ret)) {
ObExternalTableFiles temp_ext_files;
temp_ext_files.create_ts_ = cur_time;
temp_ext_files.file_urls_ = ObArrayWrap<ObString>(temp_file_urls.get_data(), temp_file_urls.count());
temp_ext_files.file_ids_ = ObArrayWrap<int64_t>(temp_file_ids.get_data(), temp_file_ids.count());
OZ (kv_cache_.put_and_fetch(key, temp_ext_files, ext_files, handle, true));
}
}
}
}
LOG_TRACE("external table fill cache", K(ext_files), K(key));
}
}
if (fill_cache_locks_[bucket_id].self_locked()) {
fill_cache_locks_[bucket_id].unlock();
}
return ret;
}
int ObExternalTableFileManager::lock_for_refresh(
ObMySQLTransaction &trans,
const uint64_t tenant_id,
const uint64_t object_id)
{
int ret = OB_SUCCESS;
ObInnerSQLConnection *conn = NULL;
if (OB_ISNULL(conn = dynamic_cast<ObInnerSQLConnection *>(trans.get_connection()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("conn_ is NULL", KR(ret));
} else {
ObLockObjRequest lock_arg;
lock_arg.obj_type_ = ObLockOBJType::OBJ_TYPE_EXTERNAL_TABLE_REFRESH;
lock_arg.obj_id_ = object_id;
lock_arg.owner_id_ = ObTableLockOwnerID(get_tid_cache());
lock_arg.lock_mode_ = EXCLUSIVE;
lock_arg.op_type_ = ObTableLockOpType::IN_TRANS_COMMON_LOCK;
lock_arg.timeout_us_ = 1000L * 1000L * 2; //2s
while (OB_FAIL(conn->lock_obj(tenant_id, lock_arg)) && !THIS_WORKER.is_timeout()) {
LOG_WARN("lock failed try again", K(ret));
}
}
return ret;
}
OB_SERIALIZE_MEMBER(ObExternalFileInfo, file_url_, file_id_, file_addr_);
}
}

View File

@ -0,0 +1,161 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef _OB_EXTERNAL_TABLE_FILE_MANAGER_H_
#define _OB_EXTERNAL_TABLE_FILE_MANAGER_H_
#include "share/ob_rpc_struct.h"
#include "observer/ob_server_struct.h"
namespace oceanbase {
namespace share {
struct ObExternalFileInfo {
ObExternalFileInfo() : file_id_(INT64_MAX) {}
common::ObString file_url_;
int64_t file_id_;
common::ObAddr file_addr_;
TO_STRING_KV(K_(file_url), K_(file_id), K_(file_addr));
OB_UNIS_VERSION(1);
};
class ObExternalTableFilesKey : public ObIKVCacheKey
{
public:
ObExternalTableFilesKey() : tenant_id_(OB_INVALID_ID),
table_id_(OB_INVALID_ID),
partition_id_(OB_INVALID_ID)
{}
virtual ~ObExternalTableFilesKey() {}
bool operator ==(const ObIKVCacheKey &other) const override {
const ObExternalTableFilesKey &other_key = reinterpret_cast<const ObExternalTableFilesKey&>(other);
return this->tenant_id_ == other_key.tenant_id_
&& this->table_id_ == other_key.table_id_
&& this->partition_id_ == other_key.partition_id_;
}
uint64_t hash() const override {
return common::murmurhash(this, sizeof(ObExternalTableFilesKey), 0);
}
uint64_t get_tenant_id() const override { return tenant_id_; }
int64_t size() const override { return sizeof(*this); }
int deep_copy(char *buf, const int64_t buf_len, ObIKVCacheKey *&key) const override;
TO_STRING_KV(K(tenant_id_), K(table_id_), K(partition_id_));
public:
uint64_t tenant_id_;
uint64_t table_id_;
uint64_t partition_id_;
};
class ObExternalTableFiles : public ObIKVCacheValue
{
public:
ObExternalTableFiles() : create_ts_(0) {}
virtual ~ObExternalTableFiles() {}
int64_t size() const override;
int deep_copy(char *buf, const int64_t buf_len, ObIKVCacheValue *&value) const override;
TO_STRING_KV(K(file_urls_), K(file_ids_), K(create_ts_));
public:
ObArrayWrap<ObString> file_urls_;
ObArrayWrap<int64_t> file_ids_;
int64_t create_ts_;
};
class ObExternalTableFileManager
{
public:
static const int64_t CACHE_EXPIRE_TIME = 20 * 1000000L; //20s
static const int64_t MAX_VERSION = INT64_MAX;
static const int64_t LOAD_CACHE_LOCK_CNT = 16;
static const int64_t LOCK_TIMEOUT = 2 * 1000000L;
ObExternalTableFileManager() {}
int init();
static ObExternalTableFileManager &get_instance();
int get_external_files(
const uint64_t tenant_id,
const uint64_t table_id,
const bool is_local_file_on_disk,
common::ObIAllocator &allocator,
common::ObIArray<ObExternalFileInfo> &external_files,
common::ObIArray<ObNewRange *> *range_filter = NULL);
int get_external_files_by_part_id(
const uint64_t tenant_id,
const uint64_t table_id,
const uint64_t partition_id,
const bool is_local_file_on_disk,
common::ObIAllocator &allocator,
common::ObIArray<ObExternalFileInfo> &external_files,
common::ObIArray<ObNewRange *> *range_filter = NULL);
int flush_cache(
const uint64_t tenant_id,
const uint64_t table_id);
int update_inner_table_file_list(
const uint64_t tenant_id,
const uint64_t table_id,
common::ObIArray<common::ObString> &file_urls,
common::ObIArray<int64_t> &file_sizes);
int get_all_records_from_inner_table(ObIAllocator &allocator,
int64_t tenant_id,
int64_t table_id,
int64_t partition_id,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_ids);
int clear_inner_table_files(
const uint64_t tenant_id,
const uint64_t table_id,
ObMySQLTransaction &trans);
int get_external_file_list_on_device(const ObString &location,
ObIArray<ObString> &file_urls,
ObIArray<int64_t> &file_sizes,
const ObString &access_info,
ObIAllocator &allocator);
private:
int update_inner_table_files_list_one_part(
const uint64_t tenant_id,
const uint64_t table_id,
const uint64_t partition_id,
ObMySQLTransaction &trans,
common::ObIArray<common::ObString> &file_urls,
common::ObIArray<int64_t> &file_sizes);
bool is_cache_value_timeout(const ObExternalTableFiles &ext_files) {
return ObTimeUtil::current_time() - ext_files.create_ts_ > CACHE_EXPIRE_TIME;
}
int fill_cache_from_inner_table(
const ObExternalTableFilesKey &key,
const ObExternalTableFiles *&ext_files,
ObKVCacheHandle &handle);
int lock_for_refresh(
ObMySQLTransaction &trans,
const uint64_t tenant_id,
const uint64_t object_id);
private:
common::ObSpinLock fill_cache_locks_[LOAD_CACHE_LOCK_CNT];
common::ObKVCache<ObExternalTableFilesKey, ObExternalTableFiles> kv_cache_;
};
}
}
#endif /* _OB_EXTERNAL_TABLE_FILE_MANAGER_H_ */

View File

@ -0,0 +1,153 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL
#include "sql/engine/ob_exec_context.h"
#include "observer/ob_server_struct.h"
#include "storage/tx/ob_trans_service.h"
#include "share/external_table/ob_external_table_file_mgr.h"
#include "share/external_table/ob_external_table_file_rpc_processor.h"
#include "share/external_table/ob_external_table_file_task.h"
namespace oceanbase
{
namespace share
{
int ObFlushExternalTableKVCacheP::process()
{
int ret = OB_SUCCESS;
ObFlushExternalTableFileCacheReq &req = arg_;
ObFlushExternalTableFileCacheRes &res = result_;
if (OB_FAIL(ObExternalTableFileManager::get_instance().flush_cache(req.tenant_id_, req.table_id_))) {
LOG_WARN("erase kvcache result failed", K(ret));
}
res.rcode_.rcode_ = ret;
return OB_SUCCESS;
}
int ObAsyncLoadExternalTableFileListP::process()
{
int ret = OB_SUCCESS;
ObLoadExternalFileListReq &req = arg_;
ObLoadExternalFileListRes &res = result_;
ObString &location = req.location_;
ObSEArray<ObString, 16> file_urls;
ObString access_info;
ObArenaAllocator allocator;
if (OB_FAIL(ObExternalTableFileManager::get_instance().get_external_file_list_on_device(location,
file_urls,
res.file_sizes_,
access_info,
allocator))) {
LOG_WARN("get external table file on device failed", K(ret));
}
for (int64_t i =0 ; OB_SUCC(ret) && i < file_urls.count(); i++) {
ObString tmp;
OZ(ob_write_string(res.get_alloc(), file_urls.at(i), tmp));
OZ(res.file_urls_.push_back(tmp));
}
res.rcode_.rcode_ = ret;
LOG_DEBUG("get external table file", K(ret), K(location), K(file_urls), K(res.file_urls_));
return ret;
}
void ObRpcAsyncLoadExternalTableFileCallBack::on_timeout()
{
int ret = OB_TIMEOUT;
int64_t current_ts = ObTimeUtility::current_time();
int64_t timeout_ts = get_send_ts() + timeout_;
if (current_ts < timeout_ts) {
LOG_DEBUG("rpc return OB_TIMEOUT before actual timeout, change error code to OB_RPC_CONNECT_ERROR", KR(ret),
K(timeout_ts), K(current_ts));
ret = OB_RPC_CONNECT_ERROR;
}
LOG_WARN("async task timeout", KR(ret));
result_.rcode_.rcode_ = ret;
context_->inc_concurrency_limit_with_signal();
}
void ObRpcAsyncLoadExternalTableFileCallBack::on_invalid()
{
int ret = OB_SUCCESS;
// a valid packet on protocol level, but can't decode it.
result_.rcode_.rcode_ = OB_INVALID_ERROR;
LOG_WARN("async task invalid", K(result_.rcode_.rcode_));
context_->inc_concurrency_limit_with_signal();
}
int ObRpcAsyncLoadExternalTableFileCallBack::process()
{
int ret = OB_SUCCESS;
LOG_DEBUG("async access callback process", K_(result));
if (OB_FAIL(get_rcode())) {
result_.rcode_.rcode_ = get_rcode();
LOG_WARN("async rpc execution failed", K(get_rcode()), K_(result));
}
context_->inc_concurrency_limit_with_signal();
return ret;
}
oceanbase::rpc::frame::ObReqTransport::AsyncCB *ObRpcAsyncLoadExternalTableFileCallBack::clone(
const oceanbase::rpc::frame::SPAlloc &alloc) const {
UNUSED(alloc);
return const_cast<rpc::frame::ObReqTransport::AsyncCB *>(
static_cast<const rpc::frame::ObReqTransport::AsyncCB * const>(this));
}
void ObRpcAsyncFlushExternalTableKVCacheCallBack::on_timeout()
{
int ret = OB_TIMEOUT;
int64_t current_ts = ObTimeUtility::current_time();
int64_t timeout_ts = get_send_ts() + timeout_;
if (current_ts < timeout_ts) {
LOG_DEBUG("rpc return OB_TIMEOUT before actual timeout, change error code to OB_RPC_CONNECT_ERROR", KR(ret),
K(timeout_ts), K(current_ts));
ret = OB_RPC_CONNECT_ERROR;
}
LOG_WARN("async task timeout", KR(ret));
result_.rcode_.rcode_ = ret;
context_->inc_concurrency_limit_with_signal();
}
void ObRpcAsyncFlushExternalTableKVCacheCallBack::on_invalid()
{
int ret = OB_SUCCESS;
// a valid packet on protocol level, but can't decode it.
result_.rcode_.rcode_ = OB_INVALID_ERROR;
LOG_WARN("async task invalid", K(result_.rcode_.rcode_));
context_->inc_concurrency_limit_with_signal();
}
int ObRpcAsyncFlushExternalTableKVCacheCallBack::process()
{
int ret = OB_SUCCESS;
LOG_DEBUG("async access callback process", K_(result));
if (OB_FAIL(get_rcode())) {
result_.rcode_.rcode_ = get_rcode();
// we need to clear op results because they are not decoded from das async rpc due to rpc error.
LOG_WARN("async rpc execution failed", K(get_rcode()), K_(result));
}
context_->inc_concurrency_limit_with_signal();
return ret;
}
oceanbase::rpc::frame::ObReqTransport::AsyncCB *ObRpcAsyncFlushExternalTableKVCacheCallBack::clone(
const oceanbase::rpc::frame::SPAlloc &alloc) const {
UNUSED(alloc);
return const_cast<rpc::frame::ObReqTransport::AsyncCB *>(
static_cast<const rpc::frame::ObReqTransport::AsyncCB * const>(this));
}
} // namespace share
} // namespace oceanbase

View File

@ -0,0 +1,173 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OB_EXTERNAL_TABLE_FILE_RPC_PROCESSOR_H_
#define OB_EXTERNAL_TABLE_FILE_RPC_PROCESSOR_H_
#include "rpc/obrpc/ob_rpc_processor.h"
#include "share/external_table/ob_external_table_file_rpc_proxy.h"
#include "share/external_table/ob_external_table_file_task.h"
#include "deps/oblib/src/lib/lock/ob_thread_cond.h"
#include "deps/oblib/src/lib/atomic/ob_atomic.h"
#include "deps/oblib/src/lib/list/ob_obj_store.h"
namespace observer
{
struct ObGlobalContext;
}
namespace oceanbase
{
namespace share
{
template<class T>
class ObAsyncRpcTaskWaitContext
{
public:
ObAsyncRpcTaskWaitContext()
: cond_(), finished_cnt_(0), task_cnt_(0), async_cb_list_() {
}
~ObAsyncRpcTaskWaitContext() = default;
int init() { return cond_.init(ObWaitEventIds::ASYNC_EXTERNAL_TABLE_LOCK_WAIT); }
void inc_concurrency_limit_with_signal()
{
common::ObThreadCondGuard guard(cond_);
if (__sync_add_and_fetch(&finished_cnt_, 1) >= task_cnt_) {
cond_.signal();
}
}
int32_t get_current_concurrency() const
{
return ATOMIC_LOAD(&finished_cnt_);
};
void inc_concurrency_limit()
{
ATOMIC_INC(&finished_cnt_);
}
int dec_concurrency_limit()
{
int ret = OB_SUCCESS;
int32_t cur = get_current_concurrency();
int32_t next = cur - 1;
if (OB_UNLIKELY(0 == cur)) {
ret = OB_SIZE_OVERFLOW;
} else {
while (ATOMIC_CAS(&finished_cnt_, cur, next) != cur) {
cur = get_current_concurrency();
next = cur - 1;
if (OB_UNLIKELY(0 == cur)) {
ret = OB_SIZE_OVERFLOW;
break;
}
}
}
return ret;
}
void set_task_count(int32_t task_count) {
task_cnt_ = task_count;
}
typedef common::ObSEArray<T *, 4> AsyncCbList;
AsyncCbList &get_cb_list() { return async_cb_list_; }
int wait_executing_tasks() {
int ret = OB_SUCCESS;
common::ObThreadCondGuard guard(cond_);
while (OB_SUCC(ret) && get_current_concurrency() < task_cnt_) {
ret = cond_.wait();
}
return ret;
}
TO_STRING_KV(K_(finished_cnt), K_(task_cnt));
private:
common::ObThreadCond cond_;
int32_t finished_cnt_;
int32_t task_cnt_;
AsyncCbList async_cb_list_;
};
class ObRpcAsyncFlushExternalTableKVCacheCallBack
: public obrpc::ObExtenralTableRpcProxy::AsyncCB<obrpc::OB_FLUSH_EXTERNAL_TABLE_FILE_CACHE>
{
public:
ObRpcAsyncFlushExternalTableKVCacheCallBack(
ObAsyncRpcTaskWaitContext<ObRpcAsyncFlushExternalTableKVCacheCallBack> *context)
: context_(context)
{
}
~ObRpcAsyncFlushExternalTableKVCacheCallBack() = default;
void on_timeout() override;
void on_invalid() override;
void set_args(const Request &arg) { UNUSED(arg); }
oceanbase::rpc::frame::ObReqTransport::AsyncCB *clone(
const oceanbase::rpc::frame::SPAlloc &alloc) const;
virtual int process();
const ObFlushExternalTableFileCacheRes &get_task_resp() const { return result_; }
ObAsyncRpcTaskWaitContext<ObRpcAsyncFlushExternalTableKVCacheCallBack> *get_async_cb_context()
{ return context_; }
TO_STRING_KV(K_(context));
private:
ObAsyncRpcTaskWaitContext<ObRpcAsyncFlushExternalTableKVCacheCallBack> *context_;
};
class ObFlushExternalTableKVCacheP : public
obrpc::ObRpcProcessor<obrpc::ObExtenralTableRpcProxy::ObRpc<obrpc::OB_FLUSH_EXTERNAL_TABLE_FILE_CACHE> >
{
public:
ObFlushExternalTableKVCacheP() {}
~ObFlushExternalTableKVCacheP() {}
int process();
private:
DISALLOW_COPY_AND_ASSIGN(ObFlushExternalTableKVCacheP);
};
class ObAsyncLoadExternalTableFileListP : public
obrpc::ObRpcProcessor<obrpc::ObExtenralTableRpcProxy::ObRpc<obrpc::OB_LOAD_EXTERNAL_FILE_LIST> >
{
public:
ObAsyncLoadExternalTableFileListP() {}
~ObAsyncLoadExternalTableFileListP() {}
int process();
private:
DISALLOW_COPY_AND_ASSIGN(ObAsyncLoadExternalTableFileListP);
};
class ObRpcAsyncLoadExternalTableFileCallBack
: public obrpc::ObExtenralTableRpcProxy::AsyncCB<obrpc::OB_LOAD_EXTERNAL_FILE_LIST>
{
public:
ObRpcAsyncLoadExternalTableFileCallBack(
ObAsyncRpcTaskWaitContext<ObRpcAsyncLoadExternalTableFileCallBack> *context)
: context_(context)
{
}
~ObRpcAsyncLoadExternalTableFileCallBack() = default;
void on_timeout() override;
void on_invalid() override;
void set_args(const Request &arg) { UNUSED(arg); }
oceanbase::rpc::frame::ObReqTransport::AsyncCB *clone(
const oceanbase::rpc::frame::SPAlloc &alloc) const;
virtual int process();
const ObLoadExternalFileListRes &get_task_resp() const { return result_; }
ObAsyncRpcTaskWaitContext<ObRpcAsyncLoadExternalTableFileCallBack> *get_async_cb_context() { return context_; }
TO_STRING_KV(K_(context));
private:
ObAsyncRpcTaskWaitContext<ObRpcAsyncLoadExternalTableFileCallBack> *context_;
};
} // namespace share
} // namespace oceanbase
#endif /* OB_EXTERNAL_TABLE_FILE_RPC_PROCESSOR_H_ */

View File

@ -0,0 +1,36 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OBDEV_SRC_EXTERNAL_TABLE_FILE_RPC_PROXY_H_
#define OBDEV_SRC_EXTERNAL_TABLE_FILE_RPC_PROXY_H_
#include "share/ob_define.h"
#include "rpc/obrpc/ob_rpc_proxy.h"
#include "share/external_table/ob_external_table_file_task.h"
#include "observer/ob_server_struct.h"
namespace oceanbase
{
namespace obrpc
{
class ObExtenralTableRpcProxy : public obrpc::ObRpcProxy
{
public:
DEFINE_TO(ObExtenralTableRpcProxy);
virtual ~ObExtenralTableRpcProxy() {}
// sync rpc for das task result
RPC_AP(PR5 flush_file_kvcahce, obrpc::OB_FLUSH_EXTERNAL_TABLE_FILE_CACHE, (share::ObFlushExternalTableFileCacheReq), share::ObFlushExternalTableFileCacheRes);
RPC_AP(PR5 load_external_file_list, obrpc::OB_LOAD_EXTERNAL_FILE_LIST, (share::ObLoadExternalFileListReq), share::ObLoadExternalFileListRes);
};
} // namespace obrpc
} // namespace oceanbase
#endif /* OBDEV_SRC_EXTERNAL_TABLE_FILE_RPC_PROXY_H_ */

View File

@ -0,0 +1,70 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL
#include "share/external_table/ob_external_table_file_task.h"
#include "share/external_table/ob_external_table_file_rpc_processor.h"
namespace oceanbase
{
namespace share
{
OB_SERIALIZE_MEMBER(ObFlushExternalTableFileCacheReq, tenant_id_, table_id_, partition_id_);
OB_SERIALIZE_MEMBER(ObFlushExternalTableFileCacheRes, rcode_);
OB_SERIALIZE_MEMBER(ObLoadExternalFileListReq, location_);
OB_DEF_SERIALIZE(ObLoadExternalFileListRes)
{
int ret = OB_SUCCESS;
LST_DO_CODE(OB_UNIS_ENCODE, rcode_, file_urls_, file_sizes_);
return ret;
}
OB_DEF_SERIALIZE_SIZE(ObLoadExternalFileListRes)
{
int64_t len = 0;
LST_DO_CODE(OB_UNIS_ADD_LEN, rcode_, file_urls_, file_sizes_);
return len;
}
OB_DEF_DESERIALIZE(ObLoadExternalFileListRes)
{
int ret = OB_SUCCESS;
LST_DO_CODE(OB_UNIS_DECODE, rcode_, file_urls_, file_sizes_);
for (int64_t i = 0; OB_SUCC(ret) && i < file_urls_.count(); i++) {
ObString file_url;
OZ (ob_write_string(allocator_, file_urls_.at(i), file_url));
file_urls_.at(i).assign_ptr(file_url.ptr(), file_url.length());
}
return ret;
}
int ObLoadExternalFileListRes::assign(const ObLoadExternalFileListRes &other)
{
int ret = OB_SUCCESS;
rcode_ = other.rcode_;
file_sizes_.assign(other.file_sizes_);
for (int64_t i = 0; OB_SUCC(ret) && i < other.file_urls_.count(); i++) {
ObString tmp;
OZ (ob_write_string(allocator_, other.file_urls_.at(i), tmp));
OZ (file_urls_.push_back(tmp));
}
return ret;
}
} // namespace share
} // namespace oceanbase

View File

@ -0,0 +1,79 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OBDEV_SRC_EXTERNAL_TABLE_FILE_TASK_H_
#define OBDEV_SRC_EXTERNAL_TABLE_FILE_TASK_H_
#include "rpc/obrpc/ob_rpc_result_code.h"
#include "deps/oblib/src/lib/lock/ob_thread_cond.h"
namespace oceanbase
{
namespace share
{
class ObFlushExternalTableFileCacheReq
{
OB_UNIS_VERSION(1);
public:
ObFlushExternalTableFileCacheReq() :
tenant_id_(common::OB_INVALID_ID), table_id_(common::OB_INVALID_ID), partition_id_(common::OB_INVALID_ID) {}
public:
uint64_t tenant_id_;
int64_t table_id_;
int64_t partition_id_;
TO_STRING_KV(K_(tenant_id), K_(table_id), K_(partition_id));
};
class ObFlushExternalTableFileCacheRes
{
OB_UNIS_VERSION(1);
public:
ObFlushExternalTableFileCacheRes() : rcode_() {}
TO_STRING_KV(K_(rcode));
public:
obrpc::ObRpcResultCode rcode_;
};
class ObLoadExternalFileListReq
{
OB_UNIS_VERSION(1);
public:
ObLoadExternalFileListReq() :
location_() {}
public:
ObString location_;
TO_STRING_KV(K_(location));
};
class ObLoadExternalFileListRes
{
OB_UNIS_VERSION(1);
public:
ObLoadExternalFileListRes() : rcode_(), file_urls_(), file_sizes_(), allocator_() {}
ObIAllocator &get_alloc() { return allocator_; }
int assign(const ObLoadExternalFileListRes &other);
TO_STRING_KV(K_(rcode));
public:
obrpc::ObRpcResultCode rcode_; //返回的错误信息
ObSEArray<ObString, 8> file_urls_;
ObSEArray<int64_t, 8> file_sizes_;
private:
ObArenaAllocator allocator_;
};
} // namespace share
} // namespace oceanbase
#endif /* OBDEV_SRC_EXTERNAL_TABLE_FILE_TASK_H_ */

View File

@ -0,0 +1,369 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL
#include "share/external_table/ob_external_table_utils.h"
#include "common/ob_range.h"
#include "common/object/ob_object.h"
#include "share/external_table/ob_external_table_file_mgr.h"
#include "sql/engine/expr/ob_expr_regexp_context.h"
#include "sql/engine/expr/ob_expr_util.h"
#include "sql/engine/ob_exec_context.h"
#include "sql/engine/table/ob_external_table_access_service.h"
#include "sql/ob_sql_utils.h"
#include "sql/rewrite/ob_query_range.h"
namespace oceanbase
{
using namespace common;
using namespace sql;
namespace share
{
bool ObExternalTableUtils::is_left_edge(const ObObj &value)
{
bool ret = false;
bool is_oracle = lib::is_oracle_mode();
if ((is_oracle && value.is_min_value()) ||
(!is_oracle && (value.is_min_value() || value.is_null()))) {
ret = true;
}
return ret;
}
bool ObExternalTableUtils::is_right_edge(const ObObj &value)
{
bool ret = false;
bool is_oracle = lib::is_oracle_mode();
if ((is_oracle && (value.is_max_value() || value.is_null())) ||
(!is_oracle && value.is_max_value())) {
ret = true;
}
return ret;
}
int ObExternalTableUtils::is_file_id_in_ranges(const ObIArray<ObNewRange *> &range_filter,
const int64_t &file_id,
bool &in_ranges)
{
int ret = OB_SUCCESS;
in_ranges = false;
for (int64_t i = 0; OB_SUCC(ret) && !in_ranges && i < range_filter.count(); ++i) {
int64_t start_file_id = ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_FILE_ID;
int64_t end_file_id = INT64_MAX;
if (OB_ISNULL(range_filter.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get Null ptr", K(ret));
} else if (OB_FAIL(resolve_file_id_range(*range_filter.at(i), 0, start_file_id, end_file_id))) {
LOG_WARN("failed to resolve range in external table", K(ret));
} else if (file_id >= start_file_id && file_id <= end_file_id) {
in_ranges = true;
}
}
return ret;
}
int64_t ObExternalTableUtils::get_edge_value(const ObObj &edge) {
int64_t value = 1;
if (is_left_edge(edge)) {
// file_id and line_number are begin at 1
value = 1;
} else if (is_right_edge(edge)) {
value = INT64_MAX;
} else {
value = edge.get_int();
}
return value;
}
int ObExternalTableUtils::resolve_file_id_range(const ObNewRange &range,
const int64_t &column_idx,
int64_t &start_file,
int64_t &end_file)
{
int ret = OB_SUCCESS;
start_file = ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_FILE_ID;
end_file = INT64_MAX;
if (column_idx >= range.get_start_key().get_obj_cnt() ||
column_idx >= range.get_end_key().get_obj_cnt() ) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed. input column idx invalid", K(ret), K(range), K(column_idx));
} else {
const ObObj &start_obj = range.get_start_key().get_obj_ptr()[column_idx];
const ObObj &end_obj = range.get_end_key().get_obj_ptr()[column_idx];
start_file = get_edge_value(start_obj);
end_file = get_edge_value(end_obj);
}
return ret;
}
int ObExternalTableUtils::resolve_line_number_range(const ObNewRange &range,
const int64_t &column_idx,
int64_t &start_lineno,
int64_t &end_lineno)
{
int ret = OB_SUCCESS;
start_lineno = ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_LINE_NUMBER;
end_lineno = INT64_MAX;
if (column_idx >= range.get_start_key().get_obj_cnt() ||
column_idx >= range.get_end_key().get_obj_cnt() ) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed. input column idx invalid", K(ret), K(range), K(column_idx));
} else {
const ObObj &start_obj = range.get_start_key().get_obj_ptr()[column_idx];
const ObObj &end_obj = range.get_end_key().get_obj_ptr()[column_idx];
start_lineno = get_edge_value(start_obj);
end_lineno = get_edge_value(end_obj);
if (!is_left_edge(start_obj) && !is_right_edge(start_obj) && !range.border_flag_.inclusive_start()) {
start_lineno++;
}
if (!is_left_edge(end_obj) && !is_right_edge(end_obj) && !range.border_flag_.inclusive_end()) {
end_lineno--;
}
}
return ret;
}
int ObExternalTableUtils::convert_external_table_new_range(const ObString &file_url,
const int64_t file_id,
const uint64_t ref_table_id,
const ObNewRange &range,
ObIAllocator &allocator,
ObNewRange &new_range,
bool &is_valid)
{
int ret = OB_SUCCESS;
int64_t start_file_id = ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_FILE_ID;
int64_t end_file_id = INT64_MAX;
int64_t start_lineno = ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_LINE_NUMBER;
int64_t end_lineno = INT64_MAX;
ObObj start_obj;
ObObj end_obj;
is_valid = false;
if (OB_UNLIKELY(range.get_start_key().get_obj_cnt() != 2 ||
range.get_end_key().get_obj_cnt() != 2)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed. get unexpected params", K(ret), K(range));
} else if (OB_FAIL(resolve_file_id_range(range, 0, start_file_id, end_file_id))) {
LOG_WARN("failed to resolve range in external table", K(ret));
} else if (file_id >= start_file_id && file_id <= end_file_id) {
if (file_id == start_file_id) {
start_obj = ObObj(range.get_start_key().get_obj_ptr()[1]);
} else {
start_obj.set_min_value();
}
if (file_id == end_file_id) {
end_obj = ObObj(range.get_end_key().get_obj_ptr()[1]);
} else {
end_obj.set_max_value();
}
/* 1. line number is invalid as min:min or max:max.
* 2. start_lineno > end_lineno. eg, (min:1)--> [1, 1) --> [1, 0]
*/
bool start_min = is_left_edge(start_obj);
bool start_max = is_right_edge(start_obj);
bool end_min = is_left_edge(end_obj);
bool end_max = is_right_edge(end_obj);
if (!(start_min && end_min) && !(start_max && end_max)) {
start_lineno = get_edge_value(start_obj);
end_lineno = get_edge_value(end_obj);
if (!start_min && !start_max && !range.border_flag_.inclusive_start()) {
start_lineno++;
}
if (!end_min && !end_max && !range.border_flag_.inclusive_end()) {
end_lineno--;
}
if (end_lineno >= start_lineno) {
is_valid = true;
}
}
}
if (OB_SUCC(ret) && is_valid) {
if (OB_FAIL(make_external_table_scan_range(file_url,
file_id,
ref_table_id,
start_lineno,
end_lineno,
allocator,
new_range))) {
LOG_WARN("failed to make external table scan range", K(ret));
}
}
return ret;
}
int ObExternalTableUtils::convert_external_table_empty_range(const ObString &file_url,
const int64_t file_id,
const uint64_t ref_table_id,
ObIAllocator &allocator,
ObNewRange &new_range)
{
int ret = OB_SUCCESS;
if (OB_FAIL(make_external_table_scan_range(file_url,
file_id,
ref_table_id,
ObCSVTableRowIterator::MIN_EXTERNAL_TABLE_LINE_NUMBER,
INT64_MAX,
allocator,
new_range))) {
LOG_WARN("failed to make external table scan range", K(ret));
}
return ret;
}
int ObExternalTableUtils::make_external_table_scan_range(const common::ObString &file_url,
const int64_t file_id,
const uint64_t ref_table_id,
const int64_t first_lineno,
const int64_t last_lineno,
common::ObIAllocator &allocator,
common::ObNewRange &new_range)
{
int ret = OB_SUCCESS;
ObObj *obj_start = NULL;
ObObj *obj_end = NULL;
if (OB_UNLIKELY(first_lineno > last_lineno)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed. get invalid params", K(ret), K(first_lineno), K(last_lineno));
} else if (OB_ISNULL(obj_start = static_cast<ObObj*>(allocator.alloc(sizeof(ObObj) *
MAX_EXTERNAL_FILE_SCANKEY)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret));
} else if (OB_ISNULL(obj_end = static_cast<ObObj*>(allocator.alloc(sizeof(ObObj) *
MAX_EXTERNAL_FILE_SCANKEY)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret));
} else {
obj_start[PARTITION_ID] = ObObj();
obj_start[PARTITION_ID].set_uint64(ref_table_id);
obj_end[PARTITION_ID] = ObObj();
obj_end[PARTITION_ID].set_uint64(ref_table_id);
obj_start[FILE_URL] = ObObj();
obj_start[FILE_URL].set_varchar(file_url);
obj_start[FILE_URL].set_collation_type(ObCharset::get_system_collation());
obj_end[FILE_URL] = ObObj();
obj_end[FILE_URL].set_varchar(file_url);
obj_end[FILE_URL].set_collation_type(ObCharset::get_system_collation());
obj_start[FILE_ID] = ObObj();
obj_start[FILE_ID].set_int(file_id);
obj_end[FILE_ID] = ObObj();
obj_end[FILE_ID].set_int(file_id);
obj_start[LINE_NUMBER] = ObObj();
obj_start[LINE_NUMBER].set_int(first_lineno);
obj_end[LINE_NUMBER] = ObObj();
obj_end[LINE_NUMBER].set_int(last_lineno);
new_range.border_flag_.set_inclusive_start();
new_range.border_flag_.set_inclusive_end();
new_range.start_key_.assign(obj_start, MAX_EXTERNAL_FILE_SCANKEY);
new_range.end_key_.assign(obj_end, MAX_EXTERNAL_FILE_SCANKEY);
}
return ret;
}
int ObExternalTableUtils::prepare_single_scan_range(const uint64_t tenant_id,
const uint64_t table_id,
ObIArray<ObNewRange *> &ranges,
ObIAllocator &range_allocator,
ObIArray<ObNewRange *> &new_range,
bool is_file_on_disk) {
int ret = OB_SUCCESS;
ObSEArray<ObExternalFileInfo, 16> file_urls;
ObSEArray<ObNewRange *, 4> tmp_ranges;
if (OB_FAIL(tmp_ranges.assign(ranges))) {
LOG_WARN("failed to assign array", K(ret));
} else if (OB_FAIL(ObExternalTableFileManager::get_instance().get_external_files(tenant_id,
table_id, is_file_on_disk, range_allocator, file_urls,
tmp_ranges.empty() ? NULL : &tmp_ranges))) {
LOG_WARN("get external table file error", K(ret));
} else {
new_range.reset();
}
for (int64_t i = 0; OB_SUCC(ret) && i < tmp_ranges.count(); ++i) {
if (OB_ISNULL(tmp_ranges.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected NULL ptr", K(ret));
} else {
for (int64_t j = 0; OB_SUCC(ret) && j < file_urls.count(); ++j) {
ObNewRange *range = NULL;
bool is_valid = false;
if (OB_ISNULL(range = OB_NEWx(ObNewRange, (&range_allocator)))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to new a ptr", K(ret));
} else if (OB_FAIL(ObExternalTableUtils::convert_external_table_new_range(
file_urls.at(j).file_url_,
file_urls.at(j).file_id_,
table_id,
*tmp_ranges.at(i),
range_allocator,
*range,
is_valid))) {
LOG_WARN("failed to convert external table new range", K(ret), K(file_urls.at(j)),
K(ranges.at(i)));
} else if (is_valid) {
OZ (new_range.push_back(range));
}
}
}
}
return ret;
}
int ObExternalTableUtils::filter_external_table_files(const ObString &pattern,
ObExecContext &exec_ctx,
ObIArray<ObString> &file_urls)
{
int ret = OB_SUCCESS;
if (!pattern.empty()) {
const common::ObCollationType cs_type_pattern = CS_TYPE_UTF8MB4_BIN;
const common::ObCollationType cs_type_file = CS_TYPE_UTF8MB4_BIN;
const common::ObCollationType cs_type_match = CS_TYPE_UTF16_BIN;
ObExprRegexContext regex_ctx;
ObArenaAllocator allocator;
uint32_t flags = 0;
ObString match_string;
ObSEArray<ObString, 8> tmp_file_urls;
if (OB_FAIL(ObExprRegexContext::get_regexp_flags(match_string, true, flags))) {
LOG_WARN("failed to get regexp flags", K(ret));
} else if (OB_FAIL(regex_ctx.init(exec_ctx.get_allocator(),
exec_ctx.get_my_session(),
pattern,
flags,
true,
cs_type_pattern))) {
LOG_WARN("init regex context failed", K(ret), K(pattern));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); ++i) {
bool match = false;
ObString out_text;
if (OB_FAIL(ObExprUtil::convert_string_collation(file_urls.at(i),
cs_type_file,
out_text,
cs_type_match,
allocator))) {
LOG_WARN("convert charset failed", K(ret));
} else if (OB_FAIL(regex_ctx.match(allocator, out_text, 0, match))) {
LOG_WARN("regex match failed", K(ret));
} else if (match && OB_FAIL(tmp_file_urls.push_back(file_urls.at(i)))) {
LOG_WARN("failed to push back into tmp_file_urls", K(ret));
}
}
if (OB_SUCC(ret) && OB_FAIL(file_urls.assign(tmp_file_urls))) {
LOG_WARN("failed to assign file_urls", K(ret));
}
}
}
return ret;
}
} // namespace share
} // namespace oceanbase

View File

@ -0,0 +1,98 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef _OB_EXTERNAL_TABLE_UTILS_H_
#define _OB_EXTERNAL_TABLE_UTILS_H_
#include "lib/container/ob_iarray.h"
namespace oceanbase
{
namespace common
{
class ObObj;
class ObNewRange;
}
namespace sql
{
class ObDASTabletLoc;
class ObExecContext;
class ObExternalTableAccessService;
class ObQueryRange;
}
namespace share
{
class ObExternalTableUtils {
public:
enum ExternalTableRangeColumn {
PARTITION_ID = 0,
FILE_URL,
FILE_ID,
LINE_NUMBER,
MAX_EXTERNAL_FILE_SCANKEY
};
public:
// range_filter is from query_range
static int is_file_id_in_ranges(const common::ObIArray<common::ObNewRange *> &range_filter,
const int64_t &file_id,
bool &in_ranges);
static int resolve_file_id_range(const common::ObNewRange &range,
const int64_t &column_idx,
int64_t &start_file,
int64_t &end_file);
// file_id is same in start and end
static int resolve_line_number_range(const common::ObNewRange &range,
const int64_t &column_idx,
int64_t &start_lineno,
int64_t &end_lineno);
static int convert_external_table_new_range(const common::ObString &file_url,
const int64_t file_id,
const uint64_t ref_table_id,
const common::ObNewRange &range,
common::ObIAllocator &allocator,
common::ObNewRange &new_range,
bool &is_valid);
static int convert_external_table_empty_range(const common::ObString &file_url,
const int64_t file_id,
const uint64_t ref_table_id,
common::ObIAllocator &allocator,
common::ObNewRange &new_range);
static int prepare_single_scan_range(const uint64_t tenant_id,
const uint64_t table_id,
common::ObIArray<common::ObNewRange *> &ranges,
common::ObIAllocator &range_allocator,
common::ObIArray<common::ObNewRange *> &new_range,
bool is_file_on_disk);
static int filter_external_table_files(const common::ObString &pattern,
sql::ObExecContext &exec_ctx,
common::ObIArray<common::ObString> &file_urls);
private:
static bool is_left_edge(const common::ObObj &value);
static bool is_right_edge(const common::ObObj &value);
static int64_t get_edge_value(const common::ObObj &edge);
static int make_external_table_scan_range(const common::ObString &file_url,
const int64_t file_id,
const uint64_t ref_table_id,
const int64_t first_lineno,
const int64_t last_lineno,
common::ObIAllocator &allocator,
common::ObNewRange &new_range);
};
}
}
#endif /* OBDEV_SRC_EXTERNAL_TABLE_UTILS_H_ */