/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. * This file is for func xmlelement. */ #define USING_LOG_PREFIX SQL_ENG #include "ob_expr_xml_element.h" #include "sql/engine/ob_exec_context.h" #ifdef OB_BUILD_ORACLE_XML #include "lib/xml/ob_xml_util.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" #endif using namespace oceanbase::common; using namespace oceanbase::sql; namespace oceanbase { namespace sql { ObExprXmlElement::ObExprXmlElement(ObIAllocator &alloc) : ObFuncExprOperator(alloc, T_FUN_SYS_XML_ELEMENT, N_XML_ELEMENT, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) { } ObExprXmlElement::~ObExprXmlElement() { } int ObExprXmlElement::calc_result_typeN(ObExprResType& type, ObExprResType* types_stack, int64_t param_num, ObExprTypeCtx& type_ctx) const { UNUSED(type_ctx); int ret = OB_SUCCESS; if (OB_UNLIKELY(param_num < 3)) { ret = OB_ERR_PARAM_SIZE; LOG_WARN("invalid param number", K(ret), K(param_num)); } else if (!is_called_in_sql()) { ret = OB_ERR_SP_LILABEL_MISMATCH; LOG_WARN("expr call in pl semantics disallowed", K(ret), K(N_XML_ELEMENT)); LOG_USER_ERROR(OB_ERR_SP_LILABEL_MISMATCH, static_cast(strlen(N_XML_ELEMENT)), N_XML_ELEMENT); } else { // check opt_escaping if (!ob_is_integer_type(types_stack[0].get_type())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid escaping opt type", K(ret), K(types_stack[0].get_type())); } else { for (int i = 2; i < param_num && OB_SUCC(ret); i++) { const ObObjType obj_type = types_stack[i].get_type(); if (i == 2) { if (ob_is_null(obj_type)) { // do nothing } else if (obj_type == ObNumberType) { // do nothing } else if (types_stack[i].get_collation_type() == CS_TYPE_BINARY) { ret = OB_ERR_INVALID_XML_DATATYPE; LOG_USER_ERROR(OB_ERR_INVALID_XML_DATATYPE, "Character", "-"); LOG_WARN("Unsupport for string typee with binary charset input.", K(ret), K(obj_type)); } else if (ob_is_string_tc(obj_type)) { types_stack[i].set_calc_type(ObVarcharType); types_stack[i].set_calc_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); } else { ret = OB_ERR_INVALID_XML_DATATYPE; LOG_USER_ERROR(OB_ERR_INVALID_XML_DATATYPE, "Character", "-"); LOG_WARN("Unsupport for string typee with binary charset input.", K(ret), K(obj_type)); } } else if (i == 3 && ob_is_json(obj_type)) { // do nothing, result from xmlAttributes } else if (ob_is_string_type(obj_type)) { if (types_stack[i].get_collation_type() == CS_TYPE_BINARY) { types_stack[i].set_calc_collation_type(CS_TYPE_BINARY); } else if (types_stack[i].get_charset_type() != CHARSET_UTF8MB4) { types_stack[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } } else if (ObUserDefinedSQLType == types_stack[i].get_type()) { // xmltype types_stack[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } else if (ObExtendType == types_stack[i].get_type()) { types_stack[i].set_calc_type(ObUserDefinedSQLType); types_stack[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } else { types_stack[i].set_calc_type(ObVarcharType); types_stack[i].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } } } } if (OB_SUCC(ret)) { type.set_sql_udt(ObXMLSqlType); } return ret; } #ifdef OB_BUILD_ORACLE_XML int ObExprXmlElement::eval_xml_element(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) { INIT_SUCC(ret); ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); ObDatum *datum = NULL; int num_args = expr.arg_cnt_; ObString name_tag; int need_escape = 0; int is_name = 0; ObVector value_vec; const ObIJsonBase *attr_json = NULL; ObString binary_str; ObString blob_locator; bool has_attribute = false; ObXmlElement *element = NULL; ObXmlDocument *res_doc = NULL; ObMulModeMemCtx* mem_ctx = nullptr; lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(ObXMLExprHelper::get_tenant_id(ctx.exec_ctx_.get_my_session()), "XMLModule")); if (OB_ISNULL(ctx.exec_ctx_.get_my_session())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get session failed.", K(ret)); } else if (OB_FAIL(ObXmlUtil::create_mulmode_tree_context(&tmp_allocator, mem_ctx))) { LOG_WARN("fail to create tree memory context", K(ret)); } else if (OB_UNLIKELY(num_args < 3)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid args num", K(ret), K(num_args)); } else { expr.args_[0]->eval(ctx, datum); need_escape = datum->get_int(); expr.args_[1]->eval(ctx, datum); is_name = datum->get_int(); if (need_escape != 0 && need_escape != 1) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid escaping opt", K(ret), K(need_escape)); } else if (need_escape == 0 && is_name == 0) { ret = OB_ERR_XML_MISSING_COMMA; LOG_WARN("xml element param invalid", K(ret)); } else if (OB_FAIL(expr.args_[2]->eval(ctx, datum))) { LOG_WARN("expr args 1 failed", K(ret), K(expr.args_[2])); } else if(expr.args_[2]->datum_meta_.type_ == ObNumberType) { ret = OB_ERR_INVALID_XML_DATATYPE; LOG_USER_ERROR(OB_ERR_INVALID_XML_DATATYPE, "Character", "-"); LOG_WARN("Unsupport for string typee with binary charset input.", K(ret), K(expr.args_[2]->datum_meta_.type_)); } else if (OB_FAIL(ObTextStringHelper::get_string(expr, tmp_allocator, 2, datum, name_tag))) { LOG_WARN("get xml plain text failed", K(ret)); } } for (int i = 3; OB_SUCC(ret) && i < num_args && OB_SUCC(ret); i++) { ObObjType val_type = expr.args_[i]->datum_meta_.type_; ObItemType item_type = expr.args_[i]->type_; if (OB_FAIL(expr.args_[i]->eval(ctx, datum))) { LOG_WARN("expr args failed", K(ret), K(i)); } else if (ob_is_json(val_type) && i == 3) { // result from attribute has_attribute = true; void *buf = NULL; if (OB_ISNULL(buf = tmp_allocator.alloc(sizeof(ObJsonBin)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret), K(sizeof(ObJsonBin))); } else { ObJsonBin *j_bin = new (buf) ObJsonBin(datum->get_string().ptr(), datum->get_string().length(), &tmp_allocator); attr_json = static_cast(j_bin); if (OB_FAIL(j_bin->reset_iter())) { LOG_WARN("fail to reset iter", K(ret)); } else if (attr_json->json_type() != ObJsonNodeType::J_ARRAY) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid json type", K(ret), K(attr_json->json_type())); } } } else { // element value ObString xml_value_data = datum->get_string(); ObExpr *xml_arg = expr.args_[i]; bool validity = false; if (val_type == ObUserDefinedSQLType) { // xmltype if (OB_FAIL(ObTextStringHelper::read_real_string_data(&tmp_allocator, ObObjType::ObLongTextType, xml_arg->datum_meta_.cs_type_, true, xml_value_data))) { LOG_WARN("fail to get real data.", K(ret), K(xml_value_data)); // } else if (OB_FAIL(ObXMLExprHelper::check_xml_document_unparsed(mem_ctx, xml_value_data, validity))) { // LOG_WARN("check document unparsed failed", K(ret), K(xml_value_data)); // } else if (!validity) { // ret = OB_ERR_XML_PARSE; // LOG_WARN("input a unparsed document and parsing failed", K(ret), K(i), K(xml_value_data)); } else { ObObj temp_value; temp_value.set_string(ObUserDefinedSQLType, xml_value_data); value_vec.push_back(temp_value); } } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, *datum, xml_arg->datum_meta_, xml_arg->obj_meta_.has_lob_header(), xml_value_data))) { LOG_WARN("fail to get real data.", K(ret), K(xml_value_data)); } else if (expr.args_[i]->datum_meta_.cs_type_ == CS_TYPE_BINARY) { // binary const ObObjMeta obj_meta = expr.args_[i]->obj_meta_; ObObj obj; ObObj tmp_result; ObCastCtx cast_ctx(&tmp_allocator, NULL, CM_NONE, CS_TYPE_INVALID); obj.set_string(ObVarcharType, xml_value_data); if (OB_FAIL(ObHexUtils::rawtohex(obj, cast_ctx, tmp_result))) { LOG_WARN("fail to check xml binary syntax", K(ret)); } else if (OB_FAIL(construct_value_array(tmp_allocator, tmp_result.get_string(), value_vec))) { LOG_WARN("construct value array failed", K(ret)); } } else { // varchar if (need_escape) { ObStringBuffer *escape_value = nullptr; if (OB_ISNULL(escape_value = OB_NEWx(ObStringBuffer, &tmp_allocator, (&tmp_allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate buffer", K(ret)); } else if (OB_FAIL(ObXmlParserUtils::escape_xml_text(xml_value_data, *escape_value))) { LOG_WARN("escape xml value failed", K(ret), K(need_escape)); } else if (OB_FAIL(construct_value_array(tmp_allocator, ObString(escape_value->length(), escape_value->ptr()), value_vec))) { LOG_WARN("construct value array failed", K(ret)); } } else if (OB_FAIL(construct_value_array(tmp_allocator, xml_value_data, value_vec))) { LOG_WARN("construct value array failed", K(ret)); } } } } ObStringBuffer tmp_buff(&tmp_allocator); ObString text_xml; ObXmlDocument *xml_doc = nullptr; bool tag_validity = false; bool doc_validity = true; ObXmlText *tag_name_start; ObXmlText *tag_name_end; ObString start_tag("<>"); ObString end_tag(""); if (OB_FAIL(ret)) { } else if (!has_attribute && name_tag.empty()) { if (OB_ISNULL(tag_name_start = OB_NEWx(ObXmlText, mem_ctx->allocator_, ObMulModeNodeType::M_TEXT, mem_ctx))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("new xml text failed", K(ret)); } else if (OB_FALSE_IT(tag_name_start->set_text(start_tag))) { } else if (OB_ISNULL(tag_name_end = OB_NEWx(ObXmlText, mem_ctx->allocator_, ObMulModeNodeType::M_TEXT, mem_ctx))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("new xml text failed", K(ret)); } else if (OB_FALSE_IT(tag_name_end->set_text(end_tag))) { } } if (OB_FAIL(ret)) { LOG_WARN("ret failed", K(ret)); } else if (OB_ISNULL(element = OB_NEWx(ObXmlElement, (mem_ctx->allocator_), ObMulModeNodeType::M_ELEMENT, mem_ctx))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate mem failed", K(ret)); } else if (!has_attribute && name_tag.empty() && OB_FAIL(element->add_element(tag_name_start))) { LOG_WARN("element add start element failed", K(ret)); } else if (OB_FAIL(construct_element(mem_ctx, name_tag, value_vec, attr_json, element, tag_validity))) { LOG_WARN("construct_element failed", K(ret)); } else if (!has_attribute && name_tag.empty() && OB_FAIL(element->add_element(tag_name_end))) { LOG_WARN("element add end element failed", K(ret)); } else if (OB_ISNULL(res_doc = OB_NEWx(ObXmlDocument, (mem_ctx->allocator_), ObMulModeNodeType::M_UNPARSED, mem_ctx))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate mem failed", K(ret)); } else if (OB_FAIL(res_doc->add_element(element))) { LOG_WARN("res doc add element failed", K(ret)); } else if (tag_validity && element->get_unparse() && OB_FAIL(ObXMLExprHelper::check_doc_validity(mem_ctx, res_doc, doc_validity))) { // && element.get_unparse() LOG_WARN("check doc validity failed", K(ret), K(tag_validity)); } else if ((!tag_validity || !doc_validity)) { res_doc->set_xml_type(ObMulModeNodeType::M_UNPARSED); } else if (tag_validity && doc_validity) { static_cast(res_doc->at(0))->set_unparse(0); res_doc->set_xml_type(ObMulModeNodeType::M_DOCUMENT); } if (OB_FAIL(ret)) { } else if (OB_ISNULL(res_doc)) { ret = OB_ERR_UNEXPECTED; LOG_WARN(" failed to pack result, as res_doc is nullptr", K(ret)); } else if (OB_FAIL(res_doc->get_raw_binary(binary_str, &tmp_allocator))) { LOG_WARN("get raw binary failed", K(ret)); } else if (OB_FAIL(ObXMLExprHelper::pack_binary_res(expr, ctx, binary_str, blob_locator))) { LOG_WARN("pack binary res failed", K(ret), K(binary_str)); } else { res.set_string(blob_locator.ptr(), blob_locator.length()); } return ret; } int ObExprXmlElement::construct_value_array(ObIAllocator &allocator, const ObString &value, ObVector &res_value) { INIT_SUCC(ret); if (value.empty()) { // donothing } else { ObObj temp_value; uint32_t vec_size = res_value.size(); if (res_value.size() == 0) { temp_value.set_string(ObVarcharType, value); } else { if (res_value[vec_size - 1].get_type() == ObUserDefinedSQLType) { temp_value.set_string(ObVarcharType, value); } else { char *new_value = NULL; ObString temp_from = res_value[vec_size - 1].get_string(); if (OB_ISNULL(new_value = static_cast(allocator.alloc(temp_from.length() + value.length())))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("new value allocator failed", K(ret)); } else if (OB_FAIL(res_value.remove(res_value.last()))) { LOG_WARN("res value remove failed", K(ret)); } else { MEMCPY(new_value, temp_from.ptr(), temp_from.length()); MEMCPY(new_value + temp_from.length(), value.ptr(), value.length()); temp_value.set_string(ObVarcharType, new_value, temp_from.length() + value.length()); } } } if (OB_SUCC(ret)) { res_value.push_back(temp_value); } } return ret; } int ObExprXmlElement::construct_attribute(ObMulModeMemCtx* mem_ctx, const ObIJsonBase *attr, ObXmlElement *&element) { int ret = OB_SUCCESS; if (OB_ISNULL(attr)) { // do nothing } else if (attr->json_type() != ObJsonNodeType::J_ARRAY) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid json type", K(ret), K(attr->json_type())); } else if (attr->element_count() % 2 != 0) { ret = OB_ERR_UNEXPECTED; LOG_WARN("attribute element count invalid", K(ret), K(attr->element_count())); } else { for (uint64_t i = 0; OB_SUCC(ret) && i < attr->element_count(); i += 2) { ObIJsonBase *jb_name = NULL; ObIJsonBase *jb_value = NULL; ObString value_str = NULL; ObString key_str = NULL; ObXmlAttribute *attribute = NULL; if (OB_FAIL(attr->get_array_element(i, jb_value))) { LOG_WARN("get attribute value failed", K(ret), K(i)); } else if (OB_FAIL(attr->get_array_element(i + 1, jb_name))) { LOG_WARN("get attribute name failed", K(ret), K(i)); } else if (OB_ISNULL(jb_name) || OB_ISNULL(jb_value)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("attribute name or value is null", K(ret), K(i), K(jb_name), K(jb_value)); } else if (jb_name->json_type() == ObJsonNodeType::J_NULL || jb_value->json_type() == ObJsonNodeType::J_NULL) { LOG_DEBUG("name or content is null", K(jb_name->json_type()), K(jb_value->json_type())); } else if (OB_FAIL(ob_write_string(*mem_ctx->allocator_, ObString(jb_value->get_data_length(), jb_value->get_data()), value_str, false))) { LOG_WARN("write string value to string failed", K(ret), K(i), K(jb_name), K(jb_value)); } else if (OB_FAIL(ob_write_string(*mem_ctx->allocator_, ObString(jb_name->get_data_length(), jb_name->get_data()), key_str, false))) { LOG_WARN("write string key to string failed", K(ret), K(i), K(jb_name), K(jb_value)); } else if (OB_FAIL(element->add_attr_by_str(key_str, value_str, ObMulModeNodeType::M_ATTRIBUTE))) { LOG_WARN("add element failed", K(ret)); } } } return ret; } int ObExprXmlElement::construct_element_children(ObMulModeMemCtx* mem_ctx, ObVector &value_vec, ObXmlElement *&element, ObXmlElement *valid_ele) { int ret = OB_SUCCESS; ObXmlText *tag_value = NULL; if (OB_ISNULL(element)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("element node is NULL", K(ret)); } // build xml text for (int i = 0; OB_SUCC(ret) && i < value_vec.size(); i++) { ObObj value = value_vec[i]; ObIMulModeBase *node = NULL; ObXmlDocument *doc_node = NULL; if (value.get_type() == ObUserDefinedSQLType) { if (OB_FAIL(ObXMLExprHelper::add_binary_to_element(mem_ctx, value.get_string(), *element))) { LOG_WARN("add binary to element failed", K(ret), K(i)); } } else { if (OB_ISNULL(tag_value = OB_NEWx(ObXmlText, mem_ctx->allocator_, ObMulModeNodeType::M_TEXT, mem_ctx))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("new xml text failed", K(ret)); } else if (OB_FALSE_IT(tag_value->set_text(value.get_string()))) { } else if (OB_FAIL(element->add_element(tag_value))) { LOG_WARN("element add element failed", K(ret)); } else { element->set_unparse(1); } } } // if the constructed element is not unparsed, use valid_ele to replace the element if (!element->get_unparse()) { if (OB_ISNULL(valid_ele)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("valid element is NULL, but validity is true", K(ret)); } ObXmlNode *xml_node = NULL; for (int64_t i = 0; OB_SUCC(ret) && i < element->size(); i++) { if (OB_ISNULL(xml_node = element->at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("xml node is NULL", K(ret)); } else if (OB_FAIL(valid_ele->add_element(xml_node))) { LOG_WARN("fail to add element", K(ret)); } } // end for if (OB_SUCC(ret)) { element = valid_ele; } } return ret; } int ObExprXmlElement::construct_element(ObMulModeMemCtx* mem_ctx, const ObString &name, ObVector &value_vec, const ObIJsonBase *attr, ObXmlElement *&element, bool &validity) { INIT_SUCC(ret); ObXmlElement *valid_ele = NULL; if (OB_ISNULL(element)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("element node is NULL", K(ret)); } else if (OB_FAIL(element->init())) { LOG_WARN("element init failed", K(ret)); } else if (FALSE_IT(element->set_key(name))) { } else if (OB_FAIL(element->alter_member_sort_policy(false))) { LOG_WARN("fail to sort child element", K(ret)); } else if (OB_FAIL(construct_attribute(mem_ctx, attr, element))) { LOG_WARN("fail to construct attribute", K(ret)); } else if (OB_FAIL(ObXMLExprHelper::check_element_validity(mem_ctx, element, valid_ele, validity))) { LOG_WARN("check element validity failed", K(ret)); } else if (OB_FAIL(construct_element_children(mem_ctx, value_vec, element, valid_ele))) { LOG_WARN("fail to construct element chidren", K(ret)); } // set sort flag if (OB_SUCC(ret) && OB_FAIL(element->alter_member_sort_policy(true))) { LOG_WARN("fail to sort child element", K(ret)); } return ret; } #endif int ObExprXmlElement::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { UNUSED(expr_cg_ctx); UNUSED(raw_expr); rt_expr.eval_func_ = eval_xml_element; return OB_SUCCESS; } } }