Files
oceanbase/src/sql/engine/expr/ob_expr_word_segment.cpp
oceanbase-admin cea7de1475 init push
2021-05-31 22:56:52 +08:00

132 lines
5.1 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_ENG
#include "sql/engine/expr/ob_expr_word_segment.h"
#include "lib/word_segment/ob_word_segment.h"
namespace oceanbase {
using namespace common;
namespace sql {
ObExprWordSegment::ObExprWordSegment(ObIAllocator& allocator)
: ObFuncExprOperator(allocator, T_FUN_SYS_WORD_SEGMENT, N_WORD_SEGMENT, MORE_THAN_ZERO, NOT_ROW_DIMENSION)
{
need_charset_convert_ = false;
}
int ObExprWordSegment::calc_result_typeN(
ObExprResType& type, ObExprResType* types, int64_t param_num, ObExprTypeCtx& type_ctx) const
{
UNUSED(type_ctx);
int ret = OB_SUCCESS;
ObLength max_len = 0;
for (int64_t i = 0; /*OB_SUCCESS == ret && */ i < param_num; ++i) {
types[i].set_calc_type(ObVarcharType);
max_len += types[i].get_length();
}
type.set_varchar();
type.set_length(max_len);
ret = aggregate_charsets_for_string_result(type, types, param_num, type_ctx.get_coll_type());
return ret;
}
int ObExprWordSegment::calc_resultN(
ObObj& result, const ObObj* objs_array, int64_t param_num, ObExprCtx& expr_ctx) const
{
int ret = OB_SUCCESS;
ObSEArray<ObString, 32> words;
ObWordSegment ws;
common::hash::ObHashSet<ObObj, common::hash::NoPthreadDefendMode> hashset;
const int64_t BUCKET_SIZE = 64;
if (OB_ISNULL(objs_array) || OB_ISNULL(expr_ctx.calc_buf_) || param_num < 1) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument passed in", K(objs_array), K(expr_ctx.calc_buf_), K(param_num), K(ret));
} else if (OB_FAIL(ws.init(tokenizer_))) {
LOG_WARN("failed to init ObWordSegment", K(ret));
} else if (OB_FAIL(hashset.create(BUCKET_SIZE))) {
LOG_ERROR("failed to create hashset", K(ret));
} else {
ObSEArray<ObObj, 8> tmp_result;
for (int64_t i = 0; OB_SUCC(ret) && i < param_num; ++i) {
tmp_result.reset();
if (objs_array[i].is_null()) {
} else if (!objs_array[i].is_string_type()) {
ret = OB_ERR_INVALID_TYPE_FOR_OP;
LOG_WARN("Unsupported type to word segment", K(objs_array[i].get_type()), K(ret));
} else if (OB_FAIL(ws.segment(objs_array[i], tmp_result))) {
LOG_WARN("failed to segment string in index cell", K(i), K(objs_array[i]), K(ret));
} else {
ObObj word;
for (int64_t j = 0; OB_SUCC(ret) && j < tmp_result.count(); j++) {
word.reset();
int hash_ret = hashset.exist_refactored(tmp_result.at(j));
if (OB_HASH_EXIST == hash_ret) {
// do nothing
} else if (OB_HASH_NOT_EXIST != hash_ret) {
ret = hash_ret;
LOG_WARN("failed to check exist in hashset", K(ret));
} else if (OB_FAIL(ob_write_obj(*expr_ctx.calc_buf_, tmp_result.at(j), word))) {
LOG_WARN("failed to copy string", K(i), K(tmp_result.at(j).get_string()), K(ret));
} else if (OB_FAIL(hashset.set_refactored(word))) {
LOG_WARN("failed to set item", K(word), K(ret));
} else if (OB_FAIL(words.push_back(word.get_string()))) {
LOG_WARN("failed to push back word", K(ret));
} else { /*do nothing*/
}
}
// recover original data and free memory allocated by ws regardless of any error
int tmp_ret = ws.reset();
if (OB_SUCCESS != tmp_ret) {
ret = OB_SUCCESS == ret ? tmp_ret : ret;
LOG_WARN("Fail to reset ws", K(ret));
}
}
}
if (OB_SUCC(ret) && !words.empty()) {
int64_t len = words.count(); // for delimiters
for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) {
len += words.at(i).length();
}
if (OB_SUCC(ret)) {
char* ptr = static_cast<char*>(expr_ctx.calc_buf_->alloc(len));
if (OB_UNLIKELY(NULL == ptr)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory", K(expr_ctx.calc_buf_), K(len), K(ret));
} else {
char* cur_ptr = ptr;
for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) {
MEMCPY(cur_ptr, words.at(i).ptr(), words.at(i).length());
cur_ptr += words.at(i).length();
cur_ptr++[0] = ',';
}
if (OB_SUCC(ret)) {
ObString str(len, ptr);
result.set_varchar(str);
if (!result.is_null()) {
result.set_collation(result_type_);
}
}
}
}
}
// must destory hash anyway and free unused memory
hashset.destroy();
for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) {
expr_ctx.calc_buf_->free(words.at(i).ptr());
}
}
return ret;
}
} // namespace sql
} // namespace oceanbase