fix replace expr charset mistaken code bug
This commit is contained in:
		
							
								
								
									
										20
									
								
								deps/oblib/src/lib/charset/ob_charset.cpp
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								deps/oblib/src/lib/charset/ob_charset.cpp
									
									
									
									
										vendored
									
									
								
							@ -3141,10 +3141,10 @@ int ObCharsetUtils::init(ObIAllocator &allocator)
 | 
				
			|||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool ObStringScanner::next_character(ObString &encoding, int32_t &wchar, int &ret)
 | 
					bool ObStringScanner::next_character(ObString &encoding_value, int32_t &unicode_value, int &ret)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  bool has_next = false;
 | 
					  bool has_next = false;
 | 
				
			||||||
  ret = next_character(encoding, wchar);
 | 
					  ret = next_character(encoding_value, unicode_value);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (OB_ITER_END == ret) {
 | 
					  if (OB_ITER_END == ret) {
 | 
				
			||||||
    has_next = false;
 | 
					    has_next = false;
 | 
				
			||||||
@ -3158,20 +3158,26 @@ bool ObStringScanner::next_character(ObString &encoding, int32_t &wchar, int &re
 | 
				
			|||||||
  return has_next;
 | 
					  return has_next;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int ObStringScanner::next_character(ObString &encoding, int32_t &wchar)
 | 
					int ObStringScanner::next_character(ObString &encoding_value, int32_t &unicode_value)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  int ret = OB_SUCCESS;
 | 
					  int ret = OB_SUCCESS;
 | 
				
			||||||
  int32_t length = 0;
 | 
					  int32_t length = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ObString &str = const_cast<ObString &>(str_);
 | 
					  ObString &str = str_;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (str.empty()) {
 | 
					  if (str.empty()) {
 | 
				
			||||||
    ret = OB_ITER_END;
 | 
					    ret = OB_ITER_END;
 | 
				
			||||||
  } else if (OB_FAIL(ObCharset::mb_wc(collation_type_, str.ptr(), str.length(), length, wchar))) {
 | 
					  } else if (OB_FAIL(ObCharset::mb_wc(collation_type_, str.ptr(), str.length(), length, unicode_value))) {
 | 
				
			||||||
 | 
					    if (!!(IGNORE_INVALID_CHARACTER & flags_)) {
 | 
				
			||||||
 | 
					      ret = OB_SUCCESS;
 | 
				
			||||||
 | 
					      length = 1;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
      ret = OB_ERR_INCORRECT_STRING_VALUE;
 | 
					      ret = OB_ERR_INCORRECT_STRING_VALUE;
 | 
				
			||||||
      LOG_WARN("fail to call mb_wc", K(ret), KPHEX(str.ptr(), str.length()));
 | 
					      LOG_WARN("fail to call mb_wc", K(ret), KPHEX(str.ptr(), str.length()));
 | 
				
			||||||
  } else {
 | 
					    }
 | 
				
			||||||
    encoding.assign_ptr(str.ptr(), length);
 | 
					  }
 | 
				
			||||||
 | 
					  if (OB_SUCC(ret)) {
 | 
				
			||||||
 | 
					    encoding_value.assign_ptr(str.ptr(), length);
 | 
				
			||||||
    LOG_DEBUG("next_character", K(ret), KPHEX(str.ptr(), str.length()));
 | 
					    LOG_DEBUG("next_character", K(ret), KPHEX(str.ptr(), str.length()));
 | 
				
			||||||
    str += length;
 | 
					    str += length;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										17
									
								
								deps/oblib/src/lib/charset/ob_charset.h
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								deps/oblib/src/lib/charset/ob_charset.h
									
									
									
									
										vendored
									
									
								
							@ -608,15 +608,22 @@ private:
 | 
				
			|||||||
class ObStringScanner
 | 
					class ObStringScanner
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  ObStringScanner(const ObString &str, common::ObCollationType collation_type)
 | 
					  enum {
 | 
				
			||||||
    : str_(str), collation_type_(collation_type)
 | 
					    IGNORE_INVALID_CHARACTER = 1<<0,
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  ObStringScanner(const ObString &str, common::ObCollationType collation_type, uint64_t flags = 0)
 | 
				
			||||||
 | 
					    : origin_str_(str), str_(str), collation_type_(collation_type), flags_(flags)
 | 
				
			||||||
  {}
 | 
					  {}
 | 
				
			||||||
  int next_character(ObString &encoding, int32_t &wchar);
 | 
					  int next_character(ObString &encoding_value, int32_t &unicode_value);
 | 
				
			||||||
  bool next_character(ObString &encoding, int32_t &wchar, int &ret);
 | 
					  bool next_character(ObString &encoding_value, int32_t &unicode_value, int &ret);
 | 
				
			||||||
 | 
					  ObString get_remain_str() { return str_; }
 | 
				
			||||||
 | 
					  void forward_bytes(int64_t n) { str_ += n; }
 | 
				
			||||||
  TO_STRING_KV(K_(str), K_(collation_type));
 | 
					  TO_STRING_KV(K_(str), K_(collation_type));
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
  const ObString &str_;
 | 
					  const ObString &origin_str_;
 | 
				
			||||||
 | 
					  ObString str_;
 | 
				
			||||||
  common::ObCollationType collation_type_;
 | 
					  common::ObCollationType collation_type_;
 | 
				
			||||||
 | 
					  uint64_t flags_;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ObCharSetString
 | 
					class ObCharSetString
 | 
				
			||||||
 | 
				
			|||||||
@ -131,9 +131,10 @@ int ObExprReplace::replace(ObString &ret_str,
 | 
				
			|||||||
                           ObExprStringBuf &string_buf)
 | 
					                           ObExprStringBuf &string_buf)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  int ret = OB_SUCCESS;
 | 
					  int ret = OB_SUCCESS;
 | 
				
			||||||
 | 
					  ObString dst_str;
 | 
				
			||||||
 | 
					  bool is_null = false;
 | 
				
			||||||
  if (OB_UNLIKELY(text.length() <= 0)) {
 | 
					  if (OB_UNLIKELY(text.length() <= 0)) {
 | 
				
			||||||
    // Return empty string
 | 
					    // Return empty string
 | 
				
			||||||
    ret_str.reset();
 | 
					 | 
				
			||||||
  } else if (OB_UNLIKELY(from.length() <= 0) || OB_UNLIKELY(to.length() < 0)) {
 | 
					  } else if (OB_UNLIKELY(from.length() <= 0) || OB_UNLIKELY(to.length() < 0)) {
 | 
				
			||||||
    ret_str = text;
 | 
					    ret_str = text;
 | 
				
			||||||
  } else if (OB_ISNULL(from.ptr())) {
 | 
					  } else if (OB_ISNULL(from.ptr())) {
 | 
				
			||||||
@ -142,51 +143,29 @@ int ObExprReplace::replace(ObString &ret_str,
 | 
				
			|||||||
  } else if (OB_UNLIKELY(text.length() < from.length()) ||
 | 
					  } else if (OB_UNLIKELY(text.length() < from.length()) ||
 | 
				
			||||||
             OB_UNLIKELY(from == to)) {
 | 
					             OB_UNLIKELY(from == to)) {
 | 
				
			||||||
    ret_str = text;
 | 
					    ret_str = text;
 | 
				
			||||||
 | 
					  } else if (OB_FAIL(ObSQLUtils::check_well_formed_str(text, cs_type, dst_str, is_null, false, false))
 | 
				
			||||||
 | 
					            || OB_FAIL(ObSQLUtils::check_well_formed_str(from, cs_type, dst_str, is_null, false, false))
 | 
				
			||||||
 | 
					            || OB_FAIL(ObSQLUtils::check_well_formed_str(to, cs_type, dst_str, is_null, false, false))) {
 | 
				
			||||||
 | 
					    LOG_WARN("check well formed str failed", K(ret));
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    ObSEArray<uint32_t, 4> locations(common::ObModIds::OB_SQL_EXPR_REPLACE,
 | 
					    ObSEArray<uint32_t, 4> locations(common::ObModIds::OB_SQL_EXPR_REPLACE,
 | 
				
			||||||
                                     common::OB_MALLOC_NORMAL_BLOCK_SIZE);
 | 
					                                     common::OB_MALLOC_NORMAL_BLOCK_SIZE);
 | 
				
			||||||
    const char *buf_start = text.ptr();
 | 
					    ObString mb;
 | 
				
			||||||
    const char *buf_end = text.ptr() + text.length();
 | 
					    int32_t wc;
 | 
				
			||||||
    const ObCharsetInfo *cs = NULL;
 | 
					    ObStringScanner scanner(text, cs_type, ObStringScanner::IGNORE_INVALID_CHARACTER);
 | 
				
			||||||
    int error = 0;
 | 
					    while (OB_SUCC(ret) && scanner.get_remain_str().length() >= from.length()) {
 | 
				
			||||||
    if (OB_UNLIKELY(OB_ISNULL(cs = ObCharset::get_charset(cs_type)) ||
 | 
					      if (0 == MEMCMP(scanner.get_remain_str().ptr(), from.ptr(), from.length())) {
 | 
				
			||||||
            OB_ISNULL(cs->cset))) {
 | 
					        ret = locations.push_back(scanner.get_remain_str().ptr() - text.ptr());
 | 
				
			||||||
      ret = OB_ERR_UNEXPECTED;
 | 
					        scanner.forward_bytes(from.length());
 | 
				
			||||||
      LOG_ERROR("unexpected error. invalid argument(s)", K(cs_type));
 | 
					      } else if (OB_FAIL(scanner.next_character(mb, wc))) {
 | 
				
			||||||
    }
 | 
					        LOG_WARN("get next character failed", K(ret));
 | 
				
			||||||
    int32_t char_len = 0;
 | 
					 | 
				
			||||||
    int32_t next_char_len = 0;
 | 
					 | 
				
			||||||
    while (OB_SUCC(ret) && OB_LIKELY(error == 0) && buf_start + char_len < buf_end) {
 | 
					 | 
				
			||||||
      char_len += static_cast<int32_t>(cs->cset->well_formed_len(cs, buf_start + char_len, buf_end, 1, &error));
 | 
					 | 
				
			||||||
      if (OB_UNLIKELY(0 != error)) {
 | 
					 | 
				
			||||||
        bool is_null = false;
 | 
					 | 
				
			||||||
        //mysql strict mode will return null, otherwise will return something
 | 
					 | 
				
			||||||
        //so we should get session to acquire if is_strict mode here.
 | 
					 | 
				
			||||||
        //we now set is_strict=false.
 | 
					 | 
				
			||||||
        if (OB_FAIL(ObSQLUtils::check_well_formed_str(text, cs_type, ret_str, is_null, false, false))) {
 | 
					 | 
				
			||||||
          LOG_WARN("check well formed str failed", K(ret));
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
      } else if (next_char_len == 0 && FALSE_IT(next_char_len = char_len)) {
 | 
					 | 
				
			||||||
      } else if (char_len < from.length()) {
 | 
					 | 
				
			||||||
        //do nothing
 | 
					 | 
				
			||||||
      } else if (char_len > from.length()) {
 | 
					 | 
				
			||||||
        buf_start += next_char_len;
 | 
					 | 
				
			||||||
        char_len = 0;
 | 
					 | 
				
			||||||
        next_char_len = 0;
 | 
					 | 
				
			||||||
      } else if (0 == MEMCMP(buf_start, from.ptr(), char_len)) {
 | 
					 | 
				
			||||||
        ret = locations.push_back(buf_start - text.ptr());
 | 
					 | 
				
			||||||
        buf_start += char_len;
 | 
					 | 
				
			||||||
        char_len = 0;
 | 
					 | 
				
			||||||
        next_char_len = 0;
 | 
					 | 
				
			||||||
      } else {
 | 
					      } else {
 | 
				
			||||||
        buf_start += next_char_len;
 | 
					        //do nothing
 | 
				
			||||||
        char_len = 0;
 | 
					 | 
				
			||||||
        next_char_len = 0;
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int64_t tot_length = 0;
 | 
					    int64_t tot_length = 0;
 | 
				
			||||||
    if (OB_UNLIKELY(error != 0)) {
 | 
					    if (OB_FAIL(ret)) {
 | 
				
			||||||
    } else if (OB_FAIL(ret)) {
 | 
					 | 
				
			||||||
      ret_str.reset();
 | 
					      ret_str.reset();
 | 
				
			||||||
    } else if (locations.count() == 0) {
 | 
					    } else if (locations.count() == 0) {
 | 
				
			||||||
      ret_str = text;
 | 
					      ret_str = text;
 | 
				
			||||||
@ -253,7 +232,7 @@ int ObExprReplace::eval_replace(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &exp
 | 
				
			|||||||
    expr_datum.set_datum(*text);
 | 
					    expr_datum.set_datum(*text);
 | 
				
			||||||
  } else if (!is_lob_res) { // non text tc inputs
 | 
					  } else if (!is_lob_res) { // non text tc inputs
 | 
				
			||||||
    if (OB_FAIL(replace(res,
 | 
					    if (OB_FAIL(replace(res,
 | 
				
			||||||
                        expr.args_[0]->datum_meta_.cs_type_,
 | 
					                        expr.datum_meta_.cs_type_,
 | 
				
			||||||
                        text->get_string(),
 | 
					                        text->get_string(),
 | 
				
			||||||
                        !from->is_null() ? from->get_string() : ObString(),
 | 
					                        !from->is_null() ? from->get_string() : ObString(),
 | 
				
			||||||
                        (NULL != to && !to->is_null()) ? to->get_string() : ObString(),
 | 
					                        (NULL != to && !to->is_null()) ? to->get_string() : ObString(),
 | 
				
			||||||
@ -288,7 +267,7 @@ int ObExprReplace::eval_replace(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &exp
 | 
				
			|||||||
      LOG_WARN("failed to get string data", K(ret), K(expr.args_[2]->datum_meta_));
 | 
					      LOG_WARN("failed to get string data", K(ret), K(expr.args_[2]->datum_meta_));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if (OB_SUCC(ret)) {
 | 
					    if (OB_SUCC(ret)) {
 | 
				
			||||||
      if (OB_FAIL(replace(res, expr.args_[0]->datum_meta_.cs_type_, text_data, from_data, to_data, temp_allocator))) {
 | 
					      if (OB_FAIL(replace(res, expr.datum_meta_.cs_type_, text_data, from_data, to_data, temp_allocator))) {
 | 
				
			||||||
        LOG_WARN("do replace for lob resutl failed", K(ret), K(expr.datum_meta_.type_));
 | 
					        LOG_WARN("do replace for lob resutl failed", K(ret), K(expr.datum_meta_.type_));
 | 
				
			||||||
      } else if (OB_FAIL(ObTextStringHelper::string_to_templob_result(expr, ctx, expr_datum, res))) {
 | 
					      } else if (OB_FAIL(ObTextStringHelper::string_to_templob_result(expr, ctx, expr_datum, res))) {
 | 
				
			||||||
        LOG_WARN("set lob result failed", K(ret));
 | 
					        LOG_WARN("set lob result failed", K(ret));
 | 
				
			||||||
 | 
				
			|||||||
@ -93,7 +93,8 @@ int ObCreateTableStmt::get_first_stmt(ObString &first_stmt)
 | 
				
			|||||||
                                                  first_stmt,
 | 
					                                                  first_stmt,
 | 
				
			||||||
                                                  get_query_ctx()->get_sql_stmt_coll_type(),
 | 
					                                                  get_query_ctx()->get_sql_stmt_coll_type(),
 | 
				
			||||||
                                                  ObCharset::get_system_collation(),
 | 
					                                                  ObCharset::get_system_collation(),
 | 
				
			||||||
                                                  first_stmt))) {
 | 
					                                                  first_stmt,
 | 
				
			||||||
 | 
					                                                  ObCharset::REPLACE_UNKNOWN_CHARACTER))) {
 | 
				
			||||||
      LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
 | 
					      LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
 | 
				
			||||||
               "stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
 | 
					               "stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -34,7 +34,8 @@ int ObDDLStmt::get_first_stmt(ObString &first_stmt)
 | 
				
			|||||||
                                                first_stmt,
 | 
					                                                first_stmt,
 | 
				
			||||||
                                                get_query_ctx()->get_sql_stmt_coll_type(),
 | 
					                                                get_query_ctx()->get_sql_stmt_coll_type(),
 | 
				
			||||||
                                                ObCharset::get_system_collation(),
 | 
					                                                ObCharset::get_system_collation(),
 | 
				
			||||||
                                                first_stmt))) {
 | 
					                                                first_stmt,
 | 
				
			||||||
 | 
					                                                ObCharset::REPLACE_UNKNOWN_CHARACTER))) {
 | 
				
			||||||
    LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
 | 
					    LOG_WARN("fail to convert charset", K(ret), K(first_stmt),
 | 
				
			||||||
             "stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
 | 
					             "stmt collation type", get_query_ctx()->get_sql_stmt_coll_type());
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user