add expr to_pinyin()
This commit is contained in:
parent
753f5b99f9
commit
a6eeca581e
1
deps/oblib/src/lib/ob_name_def.h
vendored
1
deps/oblib/src/lib/ob_name_def.h
vendored
@ -1208,4 +1208,5 @@
|
||||
#define N_CALC_ODPS_SIZE "calc_odps_size"
|
||||
#define N_PRIV_ST_GEOHASH "_st_geohash"
|
||||
#define N_PRIV_ST_MAKEPOINT "_st_makepoint"
|
||||
#define N_TO_PINYIN "to_pinyin"
|
||||
#endif //OCEANBASE_LIB_OB_NAME_DEF_H_
|
||||
|
@ -895,6 +895,7 @@ typedef enum ObItemType
|
||||
T_FUNC_SYS_ARRAY_SUM = 1768,
|
||||
T_FUNC_SYS_ARRAY_COMPACT = 1769,
|
||||
T_FUNC_SYS_ARRAY_SORT = 1770,
|
||||
T_FUN_SYS_TO_PINYIN = 1771,
|
||||
///< @note add new oracle only function type before this line
|
||||
|
||||
T_FUN_SYS_TABLET_AUTOINC_NEXTVAL = 1801, // add only for heap table
|
||||
|
@ -422,9 +422,9 @@ int ObLLVMDIHelper::get_current_scope(ObLLVMDIScope &scope)
|
||||
|
||||
ObLLVMDIHelper::ObDIBasicTypeAttr ObLLVMDIHelper::basic_type_[common::ObMaxType] =
|
||||
{
|
||||
{"null", 0, 0, 0},
|
||||
{"tinyint", 8, 8, llvm::dwarf::DW_ATE_signed},
|
||||
{"smallint", 16, 16, llvm::dwarf::DW_ATE_signed},
|
||||
{"null", 0, 0, 0},
|
||||
{"tinyint", 8, 8, llvm::dwarf::DW_ATE_signed},
|
||||
{"smallint", 16, 16, llvm::dwarf::DW_ATE_signed},
|
||||
{"mediumint", 32, 32, llvm::dwarf::DW_ATE_signed},
|
||||
{"int", 32, 32, llvm::dwarf::DW_ATE_signed},
|
||||
{"bigint", 64, 64, llvm::dwarf::DW_ATE_signed},
|
||||
|
@ -823,6 +823,8 @@ ob_set_subtarget(ob_sql engine_expr
|
||||
engine/expr/ob_expr_eval_functions.cpp
|
||||
engine/expr/ob_expr_in.cpp
|
||||
engine/expr/ob_expr_calc_odps_size.cpp
|
||||
engine/expr/ob_expr_to_pinyin_tab.cpp
|
||||
engine/expr/ob_expr_to_pinyin.cpp
|
||||
)
|
||||
|
||||
ob_set_subtarget(ob_sql ALONE
|
||||
|
@ -427,6 +427,7 @@
|
||||
#include "ob_expr_get_mysql_routine_parameter_type_str.h"
|
||||
#include "ob_expr_priv_st_geohash.h"
|
||||
#include "ob_expr_priv_st_makepoint.h"
|
||||
#include "ob_expr_to_pinyin.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -1336,6 +1337,7 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = {
|
||||
NULL, // ObExprArraySort::eval_array_sort, /* 805 */
|
||||
NULL, // ObExprKeyValue::calc_key_value_expr, /* 806 */
|
||||
NULL, // ObExprToChar::eval_to_char, /* 807 */
|
||||
ObExprToPinyin::eval_to_pinyin, /* 808 */
|
||||
};
|
||||
|
||||
static ObExpr::EvalBatchFunc g_expr_eval_batch_functions[] = {
|
||||
@ -1505,6 +1507,7 @@ static ObExpr::EvalBatchFunc g_expr_eval_batch_functions[] = {
|
||||
NULL,// ObExprArraySum::eval_array_sum_batch, /* 163 */
|
||||
NULL,// ObExprArrayCompact::eval_array_compact_batch, /* 164 */
|
||||
NULL,// ObExprArraySort::eval_array_sort_batch, /* 165 */
|
||||
ObExprToPinyin::eval_to_pinyin_batch, /* 166 */
|
||||
};
|
||||
|
||||
static ObExpr::EvalVectorFunc g_expr_eval_vector_functions[] = {
|
||||
|
@ -491,6 +491,8 @@
|
||||
#include "sql/engine/expr/ob_expr_get_mysql_routine_parameter_type_str.h"
|
||||
#include "sql/engine/expr/ob_expr_priv_st_geohash.h"
|
||||
#include "sql/engine/expr/ob_expr_priv_st_makepoint.h"
|
||||
#include "sql/engine/expr/ob_expr_to_pinyin.h"
|
||||
|
||||
|
||||
|
||||
using namespace oceanbase::common;
|
||||
@ -1207,6 +1209,7 @@ void ObExprOperatorFactory::register_expr_operators()
|
||||
REG_OP(ObExprArrayMap);
|
||||
REG_OP(ObExprGetMySQLRoutineParameterTypeStr);
|
||||
REG_OP(ObExprCalcOdpsSize);
|
||||
REG_OP(ObExprToPinyin);
|
||||
}();
|
||||
// 注册oracle系统函数
|
||||
REG_OP_ORCL(ObExprSysConnectByPath);
|
||||
@ -1394,6 +1397,7 @@ void ObExprOperatorFactory::register_expr_operators()
|
||||
REG_OP_ORCL(ObExprFromTz);
|
||||
REG_OP_ORCL(ObExprSpatialCellid);
|
||||
REG_OP_ORCL(ObExprSpatialMbr);
|
||||
REG_OP_ORCL(ObExprToPinyin);
|
||||
//label security
|
||||
REG_OP_ORCL(ObExprOLSPolicyCreate);
|
||||
REG_OP_ORCL(ObExprOLSPolicyAlter);
|
||||
|
255
src/sql/engine/expr/ob_expr_to_pinyin.cpp
Normal file
255
src/sql/engine/expr/ob_expr_to_pinyin.cpp
Normal file
@ -0,0 +1,255 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#define USING_LOG_PREFIX SQL_ENG
|
||||
#include "sql/engine/expr/ob_expr_to_pinyin.h"
|
||||
#include "objit/common/ob_item_type.h"
|
||||
#include "sql/session/ob_sql_session_info.h"
|
||||
#include "sql/engine/ob_exec_context.h"
|
||||
#include "lib/charset/ob_charset_string_helper.h"
|
||||
|
||||
using namespace oceanbase::common;
|
||||
using namespace oceanbase::sql;
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace sql
|
||||
{
|
||||
#include "sql/engine/expr/ob_expr_to_pinyin_tab.h"
|
||||
|
||||
|
||||
ObExprToPinyin::ObExprToPinyin(ObIAllocator &alloc)
|
||||
: ObFuncExprOperator(alloc, T_FUN_SYS_TO_PINYIN, N_TO_PINYIN, 1, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION)
|
||||
{
|
||||
}
|
||||
|
||||
ObExprToPinyin::~ObExprToPinyin()
|
||||
{
|
||||
}
|
||||
|
||||
int ObExprToPinyin::calc_result_type1(ObExprResType &type,
|
||||
ObExprResType &type1,
|
||||
common::ObExprTypeCtx &type_ctx) const
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
type1.set_calc_type(ObVarcharType);
|
||||
type1.set_calc_collation_type(CS_TYPE_UTF8MB4_ZH_0900_AS_CS);
|
||||
type.set_varchar();
|
||||
type.set_collation_level(common::CS_LEVEL_COERCIBLE);
|
||||
const sql::ObSQLSessionInfo *session = type_ctx.get_session();
|
||||
if (OB_UNLIKELY(OB_ISNULL(session))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("session is null",K(ret));
|
||||
} else {
|
||||
type.set_collation_type(lib::is_oracle_mode() ?
|
||||
session->get_nls_collation() :
|
||||
session->get_local_collation_connection());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint64_t convert_to_sortkey(ObIAllocator &alloc, ObString input) {
|
||||
const ObCharsetInfo *cs = ObCharset::get_charset(CS_TYPE_UTF8MB4_ZH_0900_AS_CS);
|
||||
char *buf = NULL;
|
||||
size_t buf_len = cs->coll->strnxfrmlen(cs, cs->mbmaxlen*input.length());
|
||||
bool is_valid_unicode_tmp = 1;
|
||||
size_t result_len = 0;
|
||||
|
||||
uint64_t sortkey = -1;
|
||||
if (OB_ISNULL(buf = static_cast<char*>(alloc.alloc(buf_len)))) {
|
||||
int ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to alloc buf", K(ret), K(buf_len), K(input));
|
||||
} else {
|
||||
result_len = cs->coll->strnxfrm(cs,
|
||||
reinterpret_cast<uchar *>(buf),
|
||||
buf_len,
|
||||
buf_len,
|
||||
reinterpret_cast<const uchar *>(input.ptr()),
|
||||
input.length(),
|
||||
0,
|
||||
&is_valid_unicode_tmp);
|
||||
uint64_t res = *reinterpret_cast<uint64_t *>(buf);
|
||||
alloc.free(buf);
|
||||
sortkey = (res % 256) * 256 + (res / 256 % 256);
|
||||
}
|
||||
return sortkey;
|
||||
}
|
||||
|
||||
bool compare_end(const PinyinPair& a, const PinyinPair& b) {
|
||||
return a.end < b.end;
|
||||
}
|
||||
|
||||
ObString convert_word_to_pinyin(ObIAllocator &alloc, ObString input) {
|
||||
int ret = OB_SUCCESS;
|
||||
uint64_t input_sortkey = convert_to_sortkey(alloc, input);
|
||||
ObString result;
|
||||
// 根据sortkey转换为拼音
|
||||
// 二分查找
|
||||
PinyinPair target = {0, input_sortkey, ""};
|
||||
PinyinPair *it = std::lower_bound(PINYIN_TABLE, PINYIN_TABLE + PINYIN_COUNT, target, compare_end);
|
||||
if(it != PINYIN_TABLE + PINYIN_COUNT &&
|
||||
input_sortkey >= it->begin && input_sortkey <= it->end) {
|
||||
result = it->pinyin;
|
||||
} else {
|
||||
result = input;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int ObExprToPinyin::eval_to_pinyin(const ObExpr &expr, ObEvalCtx &ctx,
|
||||
ObDatum &expr_datum)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
ObDatum *input = NULL;
|
||||
ObString input_str;
|
||||
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
|
||||
ObIAllocator &calc_alloc = alloc_guard.get_allocator();
|
||||
ObIAllocator &res_alloc = ctx.get_expr_res_alloc();
|
||||
const sql::ObSQLSessionInfo *session = ctx.exec_ctx_.get_my_session();
|
||||
|
||||
if (OB_FAIL(expr.args_[0]->eval(ctx, input))) {
|
||||
LOG_WARN("fail to eval", K(ret), KPC(expr.args_[0]));
|
||||
} else if (input->is_null()) {
|
||||
expr_datum.set_null();
|
||||
return ret;
|
||||
} else {
|
||||
input_str = input->get_string();
|
||||
}
|
||||
|
||||
const ObCharsetInfo *cs = ObCharset::get_charset(CS_TYPE_UTF8MB4_ZH_0900_AS_CS);
|
||||
size_t buf_len = cs->mbmaxlen*input_str.length();
|
||||
char *buf = NULL;
|
||||
if (OB_ISNULL(buf = static_cast<char*>(calc_alloc.alloc(buf_len)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to alloc buf", K(ret), K(buf_len), K(input_str));
|
||||
} else {
|
||||
struct Functor {
|
||||
Functor(char *buf, int64_t &off, ObIAllocator &alloc) : buf(buf), off(off), calc_alloc(alloc) {}
|
||||
char *buf;
|
||||
int64_t &off;
|
||||
ObIAllocator &calc_alloc;
|
||||
|
||||
int operator() (const ObString &str, ob_wc_t wchar) {
|
||||
int ret = OB_SUCCESS;
|
||||
ObString pinyin = convert_word_to_pinyin(calc_alloc, str);
|
||||
if(!pinyin.empty()) {
|
||||
MEMCPY(buf + off, pinyin.ptr(), pinyin.length());
|
||||
off += pinyin.length();
|
||||
} else {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
int64_t off = 0;
|
||||
Functor temp_handler(buf, off, calc_alloc);
|
||||
ObCharsetType charset_type = ObCharset::charset_type_by_coll(CS_TYPE_UTF8MB4_ZH_0900_AS_CS);
|
||||
ObFastStringScanner::foreach_char(input_str, charset_type, temp_handler);
|
||||
ObString converted_result;
|
||||
OZ(ObExprUtil::convert_string_collation(ObString(off, buf),
|
||||
CS_TYPE_UTF8MB4_ZH_0900_AS_CS,
|
||||
converted_result,
|
||||
lib::is_oracle_mode() ? session->get_nls_collation() : session->get_local_collation_connection(),
|
||||
res_alloc));
|
||||
expr_datum.set_string(converted_result);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExprToPinyin::eval_to_pinyin_batch(
|
||||
const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size)
|
||||
{
|
||||
LOG_DEBUG("eval to_pinyin in batch mode", K(batch_size));
|
||||
int ret = OB_SUCCESS;
|
||||
ObDatum *results = expr.locate_batch_datums(ctx);
|
||||
const sql::ObSQLSessionInfo *session = ctx.exec_ctx_.get_my_session();
|
||||
|
||||
if (OB_ISNULL(results)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("expr results frame is not init", K(ret));
|
||||
} else {
|
||||
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
|
||||
if (OB_FAIL(expr.args_[0]->eval_batch(ctx, skip, batch_size))) {
|
||||
LOG_WARN("failed to eval batch result input", K(ret));
|
||||
} else {
|
||||
ObDatum *datum_array = expr.args_[0]->locate_batch_datums(ctx);
|
||||
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
|
||||
ObIAllocator &calc_alloc = alloc_guard.get_allocator();
|
||||
ObIAllocator &res_alloc = ctx.get_expr_res_alloc();
|
||||
const ObCharsetInfo *cs = ObCharset::get_charset(CS_TYPE_UTF8MB4_ZH_0900_AS_CS);
|
||||
|
||||
for(int64_t j = 0; OB_SUCC(ret) && j < batch_size; ++j) {
|
||||
if (skip.at(j) || eval_flags.at(j)) {
|
||||
continue;
|
||||
} else if (datum_array[j].is_null()) {
|
||||
results[j].set_null();
|
||||
eval_flags.set(j);
|
||||
} else {
|
||||
ObString input_str = datum_array[j].get_string();
|
||||
int64_t off = 0;
|
||||
char *buf = NULL;
|
||||
size_t buf_len = cs->mbmaxlen*input_str.length();
|
||||
if (OB_ISNULL(buf = static_cast<char *>(calc_alloc.alloc(buf_len)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("fail to alloc buf", K(ret), K(buf_len), K(input_str));
|
||||
} else {
|
||||
struct Functor {
|
||||
Functor(char *buf, int64_t &off, ObIAllocator &alloc) : buf(buf), off(off), calc_alloc(alloc) {}
|
||||
char *buf;
|
||||
int64_t &off;
|
||||
ObIAllocator &calc_alloc;
|
||||
int operator() (const ObString &str, ob_wc_t wchar) {
|
||||
int ret = OB_SUCCESS;
|
||||
ObString pinyin = convert_word_to_pinyin(calc_alloc, str);
|
||||
if(!pinyin.empty()) {
|
||||
MEMCPY(buf + off, pinyin.ptr(), pinyin.length());
|
||||
off += pinyin.length();
|
||||
} else {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
int64_t off = 0;
|
||||
Functor temp_handler(buf, off, calc_alloc);
|
||||
ObCharsetType charset_type = ObCharset::charset_type_by_coll(CS_TYPE_UTF8MB4_ZH_0900_AS_CS);
|
||||
ObFastStringScanner::foreach_char(input_str, charset_type, temp_handler);
|
||||
ObString converted_result;
|
||||
OZ(ObExprUtil::convert_string_collation(ObString(off, buf),
|
||||
CS_TYPE_UTF8MB4_ZH_0900_AS_CS,
|
||||
converted_result,
|
||||
lib::is_oracle_mode() ? session->get_nls_collation() : session->get_local_collation_connection(),
|
||||
res_alloc));
|
||||
results[j].set_string(converted_result);
|
||||
eval_flags.set(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExprToPinyin::cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr,
|
||||
ObExpr &rt_expr) const
|
||||
{
|
||||
UNUSED(op_cg_ctx);
|
||||
rt_expr.eval_func_ = ObExprToPinyin::eval_to_pinyin;
|
||||
if (rt_expr.args_[0]->is_batch_result()) {
|
||||
rt_expr.eval_batch_func_ = ObExprToPinyin::eval_to_pinyin_batch;
|
||||
}
|
||||
return OB_SUCCESS;
|
||||
}
|
||||
|
||||
} //namespace sql
|
||||
} //namespace oceanbase
|
46
src/sql/engine/expr/ob_expr_to_pinyin.h
Normal file
46
src/sql/engine/expr/ob_expr_to_pinyin.h
Normal file
@ -0,0 +1,46 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_TO_PINYIN_
|
||||
#define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_TO_PINYIN_
|
||||
|
||||
#include "sql/engine/expr/ob_expr_operator.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace sql
|
||||
{
|
||||
class ObExprToPinyin : public ObFuncExprOperator
|
||||
{
|
||||
public:
|
||||
explicit ObExprToPinyin(common::ObIAllocator &alloc);
|
||||
virtual ~ObExprToPinyin();
|
||||
|
||||
virtual int calc_result_type1(ObExprResType &type,
|
||||
ObExprResType &type1,
|
||||
common::ObExprTypeCtx &type_ctx) const;
|
||||
|
||||
static int eval_to_pinyin(const ObExpr &expr,
|
||||
ObEvalCtx &ctx,
|
||||
ObDatum &expr_datum);
|
||||
static int eval_to_pinyin_batch(
|
||||
const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size);
|
||||
virtual int cg_expr(ObExprCGCtx &op_cg_ctx,
|
||||
const ObRawExpr &raw_expr,
|
||||
ObExpr &rt_expr) const override;
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ObExprToPinyin);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#endif /* OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_TO_PINYIN_ */
|
1517
src/sql/engine/expr/ob_expr_to_pinyin_tab.cpp
Normal file
1517
src/sql/engine/expr/ob_expr_to_pinyin_tab.cpp
Normal file
File diff suppressed because it is too large
Load Diff
23
src/sql/engine/expr/ob_expr_to_pinyin_tab.h
Normal file
23
src/sql/engine/expr/ob_expr_to_pinyin_tab.h
Normal file
@ -0,0 +1,23 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
#ifndef OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_TO_PINYIN_TABLE_
|
||||
#define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_TO_PINYIN_TABLE_
|
||||
|
||||
// The content of `begin` and `end` in `PINYIN_TABLE` comes from the file `cldr-common-33.0.zip:common/collation/zh.xml:35~1558`;
|
||||
# define PINYIN_COUNT 1502
|
||||
struct PinyinPair{
|
||||
uint64_t begin;
|
||||
uint64_t end;
|
||||
ObString pinyin;
|
||||
};
|
||||
extern PinyinPair PINYIN_TABLE[PINYIN_COUNT];
|
||||
#endif /* OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_TO_PINYIN_ */
|
Loading…
x
Reference in New Issue
Block a user