Files
oceanbase/src/sql/engine/expr/ob_expr_initcap.cpp
2023-05-11 13:03:57 +00:00

461 lines
18 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_ENG
#include "lib/oblog/ob_log.h"
#include "sql/engine/expr/ob_expr_initcap.h"
#include "objit/common/ob_item_type.h"
#include "common/data_buffer.h"
#include "sql/session/ob_sql_session_info.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
namespace oceanbase
{
using namespace common;
namespace sql
{
ObExprInitcap::ObExprInitcap(ObIAllocator &alloc)
: ObExprInitcapCommon(alloc, T_FUN_SYS_INITCAP, N_INITCAP, 1)
{
}
ObExprInitcap::~ObExprInitcap()
{
}
int ObExprInitcap::calc_result_type1(ObExprResType &type,
ObExprResType &text,
ObExprTypeCtx &type_ctx) const
{
int ret = OB_SUCCESS;
const ObBasicSessionInfo *session = type_ctx.get_session();
ObSEArray<ObExprResType*, 1, ObNullAllocator> params;
CK(OB_NOT_NULL(session));
OZ(params.push_back(&text));
OZ(aggregate_string_type_and_charset_oracle(*session, params, type, PREFER_VAR_LEN_CHAR));
OZ(deduce_string_param_calc_type_and_charset(*session, type, params));
if (OB_SUCC(ret)) {
common::ObLength result_len = text.get_calc_length();
if (OB_UNLIKELY(!ObCharset::is_valid_collation(type.get_collation_type()))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid charset", K(type), K(ret));
} else {
result_len *= ObCharset::get_charset(type.get_collation_type())->caseup_multiply;
type.set_length(result_len);
}
}
return ret;
}
// last_has_first_letter is used to assign has_first_letter at the beginning, and return last state of text
int ObExprInitcapCommon::initcap_string(const ObString &text,
const ObCollationType cs_type,
ObIAllocator *allocator,
ObString &res_str,
bool &last_has_first_letter)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(allocator)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("allocator is null", K(ret));
} else {
int64_t case_multiply = std::max(ObCharset::get_charset(cs_type)->caseup_multiply,
ObCharset::get_charset(cs_type)->casedn_multiply);
int64_t buf_len = case_multiply * text.length();
char *buf = static_cast<char *>(allocator->alloc(buf_len));
int64_t pos = 0;
if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(text.length()), K(case_multiply), K(cs_type), K(ret));
} else {
ObStringScanner scanner(text, cs_type);
ObString cur_letter;
int32_t wchar = 0;
bool has_first_letter = last_has_first_letter;
if (1 == case_multiply) {
MEMCPY(buf, text.ptr(), text.length());
}
while (OB_SUCC(ret)) {
if (OB_ITER_END == (ret = scanner.next_character(cur_letter, wchar))) {
ret = OB_SUCCESS;
break;
} else if (OB_FAIL(ret)) {
LOG_WARN("fail to get next character", K(ret), K(scanner));
} else {
bool is_alphanumeric =
(wchar <= INT8_MAX
&& ob_isalnum(ObCharset::get_charset(CS_TYPE_UTF8MB4_GENERAL_CI), wchar));
if (is_alphanumeric) {
char *src_ptr = (1 == case_multiply) ? buf + pos : cur_letter.ptr();
int64_t buf_remain = cur_letter.length() * case_multiply;
int64_t write_len = has_first_letter ?
ObCharset::casedn(cs_type, src_ptr, cur_letter.length(), buf + pos, buf_remain)
: ObCharset::caseup(cs_type, src_ptr, cur_letter.length(), buf + pos, buf_remain);
if (OB_UNLIKELY(0 == write_len)) {
ret = OB_ERR_UNEXPECTED;
} else {
pos += write_len;
}
has_first_letter = true;
} else {
has_first_letter = false;
MEMCPY(buf + pos, cur_letter.ptr(), cur_letter.length());
pos += cur_letter.length();
}
}
}
if (OB_SUCC(ret)) {
last_has_first_letter = has_first_letter;
res_str.assign_ptr(buf, pos);
}
}
}
return ret;
}
int calc_initcap_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
{
int ret = OB_SUCCESS;
ObDatum *arg_datum = NULL;
ObString res_str;
char *res_buf = NULL;
ObCollationType cs_type = expr.args_[0]->datum_meta_.cs_type_;
int64_t case_multiply = std::max(ObCharset::get_charset(cs_type)->caseup_multiply,
ObCharset::get_charset(cs_type)->casedn_multiply);
if (OB_FAIL(expr.args_[0]->eval(ctx, arg_datum))) {
LOG_WARN("eval arg 0 failed", K(ret), K(expr));
} else if (arg_datum->is_null()) {
res_datum.set_null();
} else {
if (!ob_is_text_tc(expr.args_[0]->datum_meta_.type_)) {
if (OB_ISNULL(res_buf = expr.get_str_res_mem(ctx, arg_datum->len_ * case_multiply))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret));
} else {
ObDataBuffer buf_alloc(res_buf, arg_datum->len_ * case_multiply);
bool last_has_first_letter = false;
if (OB_FAIL(ObExprInitcap::initcap_string(arg_datum->get_string(),
cs_type,
&buf_alloc,
res_str,
last_has_first_letter))) {
LOG_WARN("initcap string failed", K(ret), K(arg_datum->get_string()));
}
}
} else { // text tc
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
ObIAllocator &calc_alloc = alloc_guard.get_allocator();
ObTextStringIter input_iter(expr.args_[0]->datum_meta_.type_, cs_type,
arg_datum->get_string(),
expr.args_[0]->obj_meta_.has_lob_header());
ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, &res_datum);
int64_t input_byte_len = 0;
int64_t buf_size = 0;
if (OB_FAIL(input_iter.init(0, NULL, &calc_alloc))) {
LOG_WARN("init input_iter failed ", K(ret), K(input_iter));
} else if (OB_FAIL(input_iter.get_byte_len(input_byte_len))) {
LOG_WARN("get input byte len failed");
} else if (OB_FAIL(output_result.init(input_byte_len * case_multiply))) {
LOG_WARN("init stringtext result failed");
} else if (input_byte_len == 0) {
// do nothing, let res_str become empty string
res_str.assign_ptr(NULL, 0);
} else if (OB_FAIL(output_result.get_reserved_buffer(res_buf, buf_size))) {
LOG_WARN("stringtext result reserve buffer failed");
} else {
ObTextStringIterState state;
ObString input_data;
bool last_has_first_letter = false;
while (OB_SUCC(ret)
&& buf_size > 0
&& (state = input_iter.get_next_block(input_data)) == TEXTSTRING_ITER_NEXT) {
ObDataBuffer buf_alloc(res_buf, buf_size);
if (OB_FAIL(ObExprInitcap::initcap_string(input_data,
cs_type,
&buf_alloc,
res_str,
last_has_first_letter))) {
LOG_WARN("initcap string failed", K(ret), K(input_data));
} else if (OB_FAIL(output_result.lseek(res_str.length(), 0))) {
LOG_WARN("result lseek failed", K(ret));
} else {
res_buf = res_buf + res_str.length();
buf_size = buf_size - res_str.length();
}
}
output_result.get_result_buffer(res_str);
}
}
if (OB_SUCC(ret)) {
if (0 == res_str.length()) {
// initcap is only for oracle mode. set res be null when string length is 0.
res_datum.set_null();
} else {
res_datum.set_string(res_str);
}
}
}
return ret;
}
int ObExprInitcap::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr,
ObExpr &rt_expr) const
{
int ret = OB_SUCCESS;
UNUSED(expr_cg_ctx);
UNUSED(raw_expr);
rt_expr.eval_func_ = calc_initcap_expr;
return ret;
}
ObExprNlsInitCap::ObExprNlsInitCap(ObIAllocator &alloc)
: ObExprInitcapCommon(alloc, T_FUN_SYS_NLS_INITCAP, N_NLS_INITCAP, PARAM_NUM_UNKNOWN)
{
}
int ObExprNlsInitCap::calc_result_typeN(ObExprResType &type,
ObExprResType *texts,
int64_t param_num,
ObExprTypeCtx &type_ctx) const
{
int ret = OB_SUCCESS;
const ObBasicSessionInfo *session = type_ctx.get_session();
if (OB_ISNULL(session)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is NULL", K(ret));
} else if (param_num <= 0) {
ret = OB_ERR_NOT_ENOUGH_ARGS_FOR_FUN;
LOG_WARN("nls_initcap require at least one parameter", K(ret), K(param_num));
} else if (param_num > 2) {
ret = OB_ERR_TOO_MANY_ARGS_FOR_FUN;
LOG_WARN("nls_initcap require at most two parameters", K(ret), K(param_num));
} else if (OB_ISNULL(texts)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument", K(ret), K(param_num), K(texts));
} else {
ObSEArray<ObExprResType*, 1, ObNullAllocator> params;
CK(OB_NOT_NULL(session));
if (OB_FAIL(params.push_back(&texts[0]))) {
LOG_WARN("fail to push back param", K(ret));
} else if OB_FAIL(aggregate_string_type_and_charset_oracle(*session, params, type, true)) {
LOG_WARN("fail to aggregrate string type and charset", K(ret));
} else if (ObVarcharType != type.get_type() && ObNVarchar2Type != type.get_type()) {
type.set_varchar();
}
if (OB_SUCC(ret)) {
if (OB_FAIL(deduce_string_param_calc_type_and_charset(*session, type, params))) {
LOG_WARN("fail to deduce string param type", K(ret));
} else {
const ObCharsetInfo * cs_info = NULL;
if (OB_ISNULL(cs_info = ObCharset::get_charset(type.get_collation_type()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get charset info", K(ret), K(texts[0]));
} else {
int64_t len = texts[0].get_calc_length();
len = len * cs_info->mbmaxlen;
type.set_length(len > OB_MAX_ORACLE_VARCHAR_LENGTH ?
OB_MAX_ORACLE_VARCHAR_LENGTH : len);
}
}
}
}
return ret;
}
int ObExprNlsInitCap::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr,
ObExpr &rt_expr) const
{
int ret = OB_SUCCESS;
UNUSED(expr_cg_ctx);
UNUSED(raw_expr);
rt_expr.eval_func_ = calc_nls_initcap_expr;
if ((1 == rt_expr.arg_cnt_ && rt_expr.args_[0]->is_batch_result())
|| (2 == rt_expr.arg_cnt_
&& rt_expr.args_[0]->is_batch_result()
&& !rt_expr.args_[1]->is_batch_result())) {
rt_expr.eval_batch_func_ = calc_nls_initcap_batch;
}
return ret;
}
int ObExprNlsInitCap::calc_nls_initcap_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
{
int ret = OB_SUCCESS;
ObCollationType cs_type = expr.args_[0]->datum_meta_.cs_type_;
bool is_null_result = false;
int64_t case_multiply = 1;
ObDatum *arg_datum = NULL;
if (OB_UNLIKELY(expr.arg_cnt_ <= 0 || expr.arg_cnt_ > 2)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected param number", K(ret), K(expr.arg_cnt_));
} else if (OB_FAIL(expr.args_[0]->eval(ctx, arg_datum))) {
LOG_WARN("eval arg 0 failed", K(ret), K(expr));
} else if (arg_datum->is_null()) {
res_datum.set_null();
is_null_result = true;
} else if (expr.arg_cnt_ == 2) {
ObDatum *param_datum = NULL;
if (OB_ISNULL(expr.args_[1])) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to get nls parameter", K(ret));
} else if (OB_FAIL(expr.args_[1]->eval(ctx, param_datum))) {
LOG_WARN("eval nls parameter failed", K(ret));
} else if (param_datum->is_null()) {
// Second param_datum is null, set result null as well.
res_datum.set_null();
is_null_result = true;
} else {
const ObString &m_param = param_datum->get_string();
if (OB_UNLIKELY(!ObExprOperator::is_valid_nls_param(m_param))) {
ret = OB_ERR_INVALID_NLS_PARAMETER_STRING;
LOG_WARN("invalid nls parameter", K(ret), K(m_param));
} else {
// Should set cs_type here, but for now, we do nothing
// since nls parameter only support BINARY
}
}
}
if (OB_SUCC(ret) && !is_null_result) {
const ObCharsetInfo * cs_info = NULL;
if (OB_ISNULL(cs_info = ObCharset::get_charset(cs_type))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get charset info", K(ret), K(cs_type));
} else {
case_multiply = std::max(cs_info->caseup_multiply, cs_info->casedn_multiply);
}
}
if (OB_SUCC(ret) && !is_null_result) {
char *res_buf = NULL;
if (OB_ISNULL(res_buf = expr.get_str_res_mem(ctx, arg_datum->len_ * case_multiply))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret));
} else {
ObDataBuffer buf_alloc(res_buf, arg_datum->len_ * case_multiply);
ObString res_str;
bool last_has_first_letter = false;
if (OB_FAIL(initcap_string(arg_datum->get_string(),
expr.args_[0]->datum_meta_.cs_type_,
&buf_alloc, res_str, last_has_first_letter))) {
LOG_WARN("initcap string failed", K(ret), K(arg_datum->get_string()));
} else if (0 == res_str.length()) {
// nls_initcap is only for oracle mode. set res be null when string length is 0.
res_datum.set_null();
} else {
res_datum.set_string(res_str);
}
}
}
return ret;
}
int ObExprNlsInitCap::calc_nls_initcap_batch(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size) {
int ret = OB_SUCCESS;
ObDatum *results = expr.locate_batch_datums(ctx);
if (OB_ISNULL(results)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("expr results frame is not init", K(ret));
} else if (OB_UNLIKELY(expr.arg_cnt_ <= 0 || expr.arg_cnt_ > 2)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected param number", K(ret), K(expr.arg_cnt_));
} else {
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
ObDatum *nlsparam_datum = NULL;
bool is_result_all_null = false;
ObCollationType cs_type = expr.args_[0]->datum_meta_.cs_type_;
int64_t case_multiply = 1;
ObDatum *datum_array = NULL;
if (OB_FAIL(expr.args_[0]->eval_batch(ctx, skip, batch_size))) {
LOG_WARN("failed to eval batch result args0", K(ret));
} else {
datum_array = expr.args_[0]->locate_batch_datums(ctx);
}
if (2 == expr.arg_cnt_) {
if (OB_FAIL(expr.args_[1]->eval(ctx, nlsparam_datum))) {
LOG_WARN("eval fmt_datum failed", K(ret));
} else if (nlsparam_datum->is_null()) {
is_result_all_null = true;
} else {
const ObString &m_param = nlsparam_datum->get_string();
if (OB_UNLIKELY(!ObExprOperator::is_valid_nls_param(m_param))) {
for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) {
if (skip.at(i) || eval_flags.at(i)) {
continue;
} else if (!datum_array[i].is_null()) {
ret = OB_ERR_INVALID_NLS_PARAMETER_STRING;
LOG_WARN("invalid nls parameter", K(ret), K(m_param));
}
}
if (OB_SUCC(ret)) {
//if the nls_sort param is not valid, return null instead of an error when the param is null
is_result_all_null = true;
}
} else {
// Should set cs_type here, but for now, we do nothing
// since nls parameter only support BINARY=
}
}
}
if (OB_SUCC(ret) && !is_result_all_null) {
const ObCharsetInfo * cs_info = NULL;
if (OB_ISNULL(cs_info = ObCharset::get_charset(cs_type))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get charset info", K(ret), K(cs_type));
} else {
case_multiply = std::max(cs_info->caseup_multiply, cs_info->casedn_multiply);
}
}
if (OB_SUCC(ret)) {
for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) {
char *res_buf = NULL;
if (skip.at(i) || eval_flags.at(i)) {
continue;
} else if (is_result_all_null || datum_array[i].is_null()) {
results[i].set_null();
eval_flags.set(i);
} else if (OB_ISNULL(res_buf = expr.get_str_res_mem(ctx, datum_array[i].len_ * case_multiply, i))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret));
} else {
ObDataBuffer buf_alloc(res_buf, datum_array[i].len_ * case_multiply);
ObString res_str;
bool last_has_first_letter = false;
if (OB_FAIL(initcap_string(datum_array[i].get_string(),
cs_type,
&buf_alloc, res_str, last_has_first_letter))) {
LOG_WARN("initcap string failed", K(ret), K(datum_array[i].get_string()));
} else if (0 == res_str.length()) {
// nls_initcap is only for oracle mode. set res be null when string length is 0.
results[i].set_null();
eval_flags.set(i);
} else {
results[i].set_string(res_str);
eval_flags.set(i);
}
}
}
}
}
return ret;
}
} /* sql */
} /* oceanbase */