// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "es_scan_node.h" #include #include #include #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Exprs_types.h" #include "runtime/runtime_state.h" #include "runtime/row_batch.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "runtime/client_cache.h" #include "util/runtime_profile.h" #include "util/debug_util.h" #include "service/backend_options.h" #include "olap/olap_common.h" #include "olap/utils.h" #include "exprs/expr_context.h" #include "exprs/expr.h" #include "exprs/in_predicate.h" #include "exprs/slot_ref.h" namespace doris { // $0 = column type (e.g. INT) const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. " "Expected value of type $0 based on column metadata. This likely indicates a " "problem with the data source library."; const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate " "$1 bytes for $2."; EsScanNode::EsScanNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ScanNode(pool, tnode, descs), _tuple_id(tnode.es_scan_node.tuple_id), _scan_range_idx(0) { if (tnode.es_scan_node.__isset.properties) { _properties = tnode.es_scan_node.properties; } } EsScanNode::~EsScanNode() { } Status EsScanNode::prepare(RuntimeState* state) { VLOG(1) << "EsScanNode::Prepare"; RETURN_IF_ERROR(ScanNode::prepare(state)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); if (_tuple_desc == nullptr) { std::stringstream ss; ss << "es tuple descriptor is null, _tuple_id=" << _tuple_id; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } _env = state->exec_env(); return Status::OK(); } Status EsScanNode::open(RuntimeState* state) { VLOG(1) << "EsScanNode::Open"; RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN)); RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::open(state)); // TExtOpenParams.row_schema vector cols; for (const SlotDescriptor* slot : _tuple_desc->slots()) { TExtColumnDesc col; col.__set_name(slot->col_name()); col.__set_type(slot->type().to_thrift()); cols.emplace_back(std::move(col)); } TExtTableSchema row_schema; row_schema.cols = std::move(cols); row_schema.__isset.cols = true; // TExtOpenParams.predicates vector > predicates; vector predicate_to_conjunct; for (int i = 0; i < _conjunct_ctxs.size(); ++i) { VLOG(1) << "conjunct: " << _conjunct_ctxs[i]->root()->debug_string(); vector disjuncts; if (get_disjuncts(_conjunct_ctxs[i], _conjunct_ctxs[i]->root(), disjuncts)) { predicates.emplace_back(std::move(disjuncts)); predicate_to_conjunct.push_back(i); } } // open every scan range vector conjunct_accepted_times(_conjunct_ctxs.size(), 0); for (int i = 0; i < _scan_ranges.size(); ++i) { TEsScanRange& es_scan_range = _scan_ranges[i]; if (es_scan_range.es_hosts.empty()) { std::stringstream ss; ss << "es fail to open: hosts empty"; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } // TExtOpenParams TExtOpenParams params; params.__set_query_id(state->query_id()); _properties["index"] = es_scan_range.index; if (es_scan_range.__isset.type) { _properties["type"] = es_scan_range.type; } _properties["shard_id"] = std::to_string(es_scan_range.shard_id); params.__set_properties(_properties); params.__set_row_schema(row_schema); params.__set_batch_size(state->batch_size()); params.__set_predicates(predicates); TExtOpenResult result; // choose an es node, local is the first choice std::string localhost = BackendOptions::get_localhost(); bool is_success = false; for (int j = 0; j < 2; ++j) { for (auto& es_host : es_scan_range.es_hosts) { if ((j == 0 && es_host.hostname != localhost) || (j == 1 && es_host.hostname == localhost)) { continue; } Status status = open_es(es_host, result, params); if (status.ok()) { is_success = true; _addresses.push_back(es_host); _scan_handles.push_back(result.scan_handle); if (result.__isset.accepted_conjuncts) { for (int index : result.accepted_conjuncts) { conjunct_accepted_times[predicate_to_conjunct[index]]++; } } break; } else if (status.code() == TStatusCode::ES_SHARD_NOT_FOUND) { // if shard not found, try other nodes LOG(WARNING) << "shard not found on es node: " << ", address=" << es_host << ", scan_range_idx=" << i << ", try other nodes"; } else { LOG(WARNING) << "es open error: scan_range_idx=" << i << ", address=" << es_host << ", msg=" << status.get_error_msg(); return status; } } if (is_success) { break; } } if (!is_success) { std::stringstream ss; ss << "es open error: scan_range_idx=" << i << ", can't find shard on any node"; return Status::InternalError(ss.str()); } } // remove those conjuncts that accepted by all scan ranges for (int i = predicate_to_conjunct.size() - 1; i >= 0; i--) { int conjunct_index = predicate_to_conjunct[i]; if (conjunct_accepted_times[conjunct_index] == _scan_ranges.size()) { _pushdown_conjunct_ctxs.push_back(*(_conjunct_ctxs.begin() + conjunct_index)); _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index); } } for (int i = 0; i < _conjunct_ctxs.size(); ++i) { if (!check_left_conjuncts(_conjunct_ctxs[i]->root())) { return Status::InternalError("esquery could only be executed on es, but could not push down to es"); } } return Status::OK(); } Status EsScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { VLOG(1) << "EsScanNode::GetNext"; RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT)); RETURN_IF_CANCELLED(state); SCOPED_TIMER(_runtime_profile->total_time_counter()); SCOPED_TIMER(materialize_tuple_timer()); // create tuple MemPool* tuple_pool = row_batch->tuple_data_pool(); int64_t tuple_buffer_size; uint8_t* tuple_buffer = nullptr; RETURN_IF_ERROR(row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buffer_size, &tuple_buffer)); Tuple* tuple = reinterpret_cast(tuple_buffer); // get batch TExtGetNextResult result; RETURN_IF_ERROR(get_next_from_es(result)); _offsets[_scan_range_idx] += result.rows.num_rows; // convert VLOG(1) << "begin to convert: scan_range_idx=" << _scan_range_idx << ", num_rows=" << result.rows.num_rows; vector& cols = result.rows.cols; // indexes of the next non-null value in the row batch, per column. vector cols_next_val_idx(_tuple_desc->slots().size(), 0); for (int row_idx = 0; row_idx < result.rows.num_rows; row_idx++) { if (reached_limit()) { *eos = true; break; } RETURN_IF_ERROR(materialize_row(tuple_pool, tuple, cols, row_idx, cols_next_val_idx)); TupleRow* tuple_row = row_batch->get_row(row_batch->add_row()); tuple_row->set_tuple(0, tuple); if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), tuple_row)) { row_batch->commit_last_row(); tuple = reinterpret_cast( reinterpret_cast(tuple) + _tuple_desc->byte_size()); ++_num_rows_returned; } } VLOG(1) << "finish one batch: num_rows=" << row_batch->num_rows(); COUNTER_SET(_rows_returned_counter, _num_rows_returned); if (result.__isset.eos && result.eos) { VLOG(1) << "es finish one scan_range: scan_range_idx=" << _scan_range_idx; ++_scan_range_idx; } if (_scan_range_idx == _scan_ranges.size()) { *eos = true; } return Status::OK(); } Status EsScanNode::close(RuntimeState* state) { if (is_closed()) return Status::OK(); VLOG(1) << "EsScanNode::Close"; RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE)); SCOPED_TIMER(_runtime_profile->total_time_counter()); Expr::close(_pushdown_conjunct_ctxs, state); RETURN_IF_ERROR(ExecNode::close(state)); for (int i = 0; i < _addresses.size(); ++i) { TExtCloseParams params; params.__set_scan_handle(_scan_handles[i]); TExtCloseResult result; #ifndef BE_TEST const TNetworkAddress& address = _addresses[i]; try { Status status; ExtDataSourceServiceClientCache* client_cache = _env->extdatasource_client_cache(); ExtDataSourceServiceConnection client(client_cache, address, 10000, &status); if (!status.ok()) { LOG(WARNING) << "es create client error: scan_range_idx=" << i << ", address=" << address << ", msg=" << status.get_error_msg(); return status; } try { VLOG(1) << "es close param=" << apache::thrift::ThriftDebugString(params); client->close(result, params); } catch (apache::thrift::transport::TTransportException& e) { LOG(WARNING) << "es close retrying, because: " << e.what(); RETURN_IF_ERROR(client.reopen()); client->close(result, params); } } catch (apache::thrift::TException &e) { std::stringstream ss; ss << "es close error: scan_range_idx=" << i << ", msg=" << e.what(); LOG(WARNING) << ss.str(); return Status::ThriftRpcError(ss.str()); } VLOG(1) << "es close result=" << apache::thrift::ThriftDebugString(result); Status status(result.status); if (!status.ok()) { LOG(WARNING) << "es close error: : scan_range_idx=" << i << ", msg=" << status.get_error_msg(); return status; } #else TStatus status; result.__set_status(status); #endif } return Status::OK(); } void EsScanNode::debug_string(int indentation_level, stringstream* out) const { *out << string(indentation_level * 2, ' '); *out << "EsScanNode(tupleid=" << _tuple_id; *out << ")" << std::endl; for (int i = 0; i < _children.size(); ++i) { _children[i]->debug_string(indentation_level + 1, out); } } Status EsScanNode::set_scan_ranges(const vector& scan_ranges) { for (int i = 0; i < scan_ranges.size(); ++i) { TScanRangeParams scan_range = scan_ranges[i]; DCHECK(scan_range.scan_range.__isset.es_scan_range); TEsScanRange es_scan_range = scan_range.scan_range.es_scan_range; _scan_ranges.push_back(es_scan_range); } _offsets.resize(scan_ranges.size(), 0); return Status::OK(); } Status EsScanNode::open_es(TNetworkAddress& address, TExtOpenResult& result, TExtOpenParams& params) { VLOG(1) << "es open param=" << apache::thrift::ThriftDebugString(params); #ifndef BE_TEST try { ExtDataSourceServiceClientCache* client_cache = _env->extdatasource_client_cache(); Status status; ExtDataSourceServiceConnection client(client_cache, address, 10000, &status); if (!status.ok()) { std::stringstream ss; ss << "es create client error: address=" << address << ", msg=" << status.get_error_msg(); return Status::InternalError(ss.str()); } try { client->open(result, params); } catch (apache::thrift::transport::TTransportException& e) { LOG(WARNING) << "es open retrying, because: " << e.what(); RETURN_IF_ERROR(client.reopen()); client->open(result, params); } VLOG(1) << "es open result=" << apache::thrift::ThriftDebugString(result); return Status(result.status); } catch (apache::thrift::TException &e) { std::stringstream ss; ss << "es open error: address=" << address << ", msg=" << e.what(); return Status::InternalError(ss.str()); } #else TStatus status; result.__set_status(status); result.__set_scan_handle("0"); return Status(status); #endif } // legacy conjuncts must not contain match function bool EsScanNode::check_left_conjuncts(Expr* conjunct) { if (is_match_func(conjunct)) { return false; } else { int num_children = conjunct->get_num_children(); for (int child_idx = 0; child_idx < num_children; ++child_idx) { if (!check_left_conjuncts(conjunct->get_child(child_idx))) { return false; } } return true; } } bool EsScanNode::ignore_cast(SlotDescriptor* slot, Expr* expr) { if (slot->type().is_date_type() && expr->type().is_date_type()) { return true; } if (slot->type().is_string_type() && expr->type().is_string_type()) { return true; } return false; } bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct, vector& disjuncts) { if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { if (conjunct->children().size() != 2) { VLOG(1) << "get disjuncts fail: number of childs is not 2"; return false; } SlotRef* slotRef; TExprOpcode::type op; Expr* expr; if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { expr = conjunct->get_child(1); slotRef = (SlotRef*)(conjunct->get_child(0)); op = conjunct->op(); } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { expr = conjunct->get_child(0); slotRef = (SlotRef*)(conjunct->get_child(1)); op = conjunct->op(); } else { VLOG(1) << "get disjuncts fail: no SLOT_REF child"; return false; } SlotDescriptor* slot_desc = get_slot_desc(slotRef); if (slot_desc == nullptr) { VLOG(1) << "get disjuncts fail: slot_desc is null"; return false; } TExtLiteral literal; if (!to_ext_literal(context, expr, &literal)) { VLOG(1) << "get disjuncts fail: can't get literal, node_type=" << expr->node_type(); return false; } TExtColumnDesc columnDesc; columnDesc.__set_name(slot_desc->col_name()); columnDesc.__set_type(slot_desc->type().to_thrift()); TExtBinaryPredicate binaryPredicate; binaryPredicate.__set_col(columnDesc); binaryPredicate.__set_op(op); binaryPredicate.__set_value(std::move(literal)); TExtPredicate predicate; predicate.__set_node_type(TExprNodeType::BINARY_PRED); predicate.__set_binary_predicate(binaryPredicate); disjuncts.push_back(std::move(predicate)); return true; } else if (is_match_func(conjunct)) { // if this is a function call expr and function name is match, then push // down it to es TExtFunction match_function; match_function.__set_func_name(conjunct->fn().name.function_name); vector query_conditions; TExtLiteral literal; if (!to_ext_literal(context, conjunct->get_child(1), &literal)) { VLOG(1) << "get disjuncts fail: can't get literal, node_type=" << conjunct->get_child(1)->node_type(); return false; } query_conditions.push_back(std::move(literal)); match_function.__set_values(query_conditions); TExtPredicate predicate; predicate.__set_node_type(TExprNodeType::FUNCTION_CALL); predicate.__set_ext_function(match_function); disjuncts.push_back(std::move(predicate)); return true; } else if (TExprNodeType::IN_PRED == conjunct->node_type()) { // the op code maybe FILTER_NEW_IN, it means there is function in list // like col_a in (abs(1)) if (TExprOpcode::FILTER_IN != conjunct->op() && TExprOpcode::FILTER_NOT_IN != conjunct->op()) { return false; } TExtInPredicate ext_in_predicate; vector in_pred_values; InPredicate* pred = dynamic_cast(conjunct); ext_in_predicate.__set_is_not_in(pred->is_not_in()); if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { return false; } SlotRef* slot_ref = (SlotRef*)(conjunct->get_child(0)); SlotDescriptor* slot_desc = get_slot_desc(slot_ref); if (slot_desc == nullptr) { return false; } TExtColumnDesc columnDesc; columnDesc.__set_name(slot_desc->col_name()); columnDesc.__set_type(slot_desc->type().to_thrift()); ext_in_predicate.__set_col(columnDesc); if (pred->get_child(0)->type().type != slot_desc->type().type) { if (!ignore_cast(slot_desc, pred->get_child(0))) { return false; } } HybirdSetBase::IteratorBase* iter = pred->hybird_set()->begin(); while (iter->has_next()) { if (nullptr == iter->get_value()) { return false; } TExtLiteral literal; if (!to_ext_literal(slot_desc->type().type, const_cast(iter->get_value()), &literal)) { VLOG(1) << "get disjuncts fail: can't get literal, node_type=" << slot_desc->type().type; return false; } in_pred_values.push_back(literal); iter->next(); } ext_in_predicate.__set_values(in_pred_values); TExtPredicate predicate; predicate.__set_node_type(TExprNodeType::IN_PRED); predicate.__set_in_predicate(ext_in_predicate); disjuncts.push_back(std::move(predicate)); return true; } else if (TExprNodeType::COMPOUND_PRED == conjunct->node_type()) { if (TExprOpcode::COMPOUND_OR != conjunct->op()) { VLOG(1) << "get disjuncts fail: op is not COMPOUND_OR"; return false; } if (!get_disjuncts(context, conjunct->get_child(0), disjuncts)) { return false; } if (!get_disjuncts(context, conjunct->get_child(1), disjuncts)) { return false; } return true; } else { VLOG(1) << "get disjuncts fail: node type is " << conjunct->node_type() << ", should be BINARY_PRED or COMPOUND_PRED"; return false; } } bool EsScanNode::is_match_func(Expr* conjunct) { if (TExprNodeType::FUNCTION_CALL == conjunct->node_type() && conjunct->fn().name.function_name == "esquery") { return true; } return false; } SlotDescriptor* EsScanNode::get_slot_desc(SlotRef* slotRef) { std::vector slot_ids; slotRef->get_slot_ids(&slot_ids); SlotDescriptor* slot_desc = nullptr; for (SlotDescriptor* slot : _tuple_desc->slots()) { if (slot->id() == slot_ids[0]) { slot_desc = slot; break; } } return slot_desc; } bool EsScanNode::to_ext_literal(ExprContext* context, Expr* expr, TExtLiteral* literal) { switch (expr->node_type()) { case TExprNodeType::BOOL_LITERAL: case TExprNodeType::INT_LITERAL: case TExprNodeType::LARGE_INT_LITERAL: case TExprNodeType::FLOAT_LITERAL: case TExprNodeType::DECIMAL_LITERAL: case TExprNodeType::STRING_LITERAL: case TExprNodeType::DATE_LITERAL: return to_ext_literal(expr->type().type, context->get_value(expr, NULL), literal); default: return false; } } bool EsScanNode::to_ext_literal(PrimitiveType slot_type, void* value, TExtLiteral* literal) { TExprNodeType::type node_type; switch (slot_type) { case TYPE_BOOLEAN: { node_type = (TExprNodeType::BOOL_LITERAL); TBoolLiteral bool_literal; bool_literal.__set_value(*reinterpret_cast(value)); literal->__set_bool_literal(bool_literal); break; } case TYPE_TINYINT: { node_type = (TExprNodeType::INT_LITERAL); TIntLiteral int_literal; int_literal.__set_value(*reinterpret_cast(value)); literal->__set_int_literal(int_literal); break; } case TYPE_SMALLINT: { node_type = (TExprNodeType::INT_LITERAL); TIntLiteral int_literal; int_literal.__set_value(*reinterpret_cast(value)); literal->__set_int_literal(int_literal); break; } case TYPE_INT: { node_type = (TExprNodeType::INT_LITERAL); TIntLiteral int_literal; int_literal.__set_value(*reinterpret_cast(value)); literal->__set_int_literal(int_literal); break; } case TYPE_BIGINT: { node_type = (TExprNodeType::INT_LITERAL); TIntLiteral int_literal; int_literal.__set_value(*reinterpret_cast(value)); literal->__set_int_literal(int_literal); break; } case TYPE_LARGEINT: { node_type = (TExprNodeType::LARGE_INT_LITERAL); char buf[48]; int len = 48; char* v = LargeIntValue::to_string(*reinterpret_cast<__int128*>(value), buf, &len); TLargeIntLiteral large_int_literal; large_int_literal.__set_value(v); literal->__set_large_int_literal(large_int_literal); break; } case TYPE_FLOAT: { node_type = (TExprNodeType::FLOAT_LITERAL); TFloatLiteral float_literal; float_literal.__set_value(*reinterpret_cast(value)); literal->__set_float_literal(float_literal); break; } case TYPE_DOUBLE: { node_type = (TExprNodeType::FLOAT_LITERAL); TFloatLiteral float_literal; float_literal.__set_value(*reinterpret_cast(value)); literal->__set_float_literal(float_literal); break; } case TYPE_DECIMAL: { node_type = (TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; decimal_literal.__set_value(reinterpret_cast(value)->to_string()); literal->__set_decimal_literal(decimal_literal); break; } case TYPE_DATE: case TYPE_DATETIME: { node_type = (TExprNodeType::DATE_LITERAL); const DateTimeValue date_value = *reinterpret_cast(value); char str[MAX_DTVALUE_STR_LEN]; date_value.to_string(str); TDateLiteral date_literal; date_literal.__set_value(str); literal->__set_date_literal(date_literal); break; } case TYPE_CHAR: case TYPE_VARCHAR: { node_type = (TExprNodeType::STRING_LITERAL); TStringLiteral string_literal; string_literal.__set_value((reinterpret_cast(value))->debug_string()); literal->__set_string_literal(string_literal); break; } default: { DCHECK(false) << "Invalid type."; return false; } } literal->__set_node_type(node_type); return true; } Status EsScanNode::get_next_from_es(TExtGetNextResult& result) { TExtGetNextParams params; params.__set_scan_handle(_scan_handles[_scan_range_idx]); params.__set_offset(_offsets[_scan_range_idx]); // getNext const TNetworkAddress &address = _addresses[_scan_range_idx]; #ifndef BE_TEST try { Status create_client_status; ExtDataSourceServiceClientCache *client_cache = _env->extdatasource_client_cache(); ExtDataSourceServiceConnection client(client_cache, address, 10000, &create_client_status); if (!create_client_status.ok()) { LOG(WARNING) << "es create client error: scan_range_idx=" << _scan_range_idx << ", address=" << address << ", msg=" << create_client_status.get_error_msg(); return create_client_status; } try { VLOG(1) << "es get_next param=" << apache::thrift::ThriftDebugString(params); client->getNext(result, params); } catch (apache::thrift::transport::TTransportException& e) { std::stringstream ss; ss << "es get_next error: scan_range_idx=" << _scan_range_idx << ", msg=" << e.what(); LOG(WARNING) << ss.str(); RETURN_IF_ERROR(client.reopen()); return Status::ThriftRpcError(ss.str()); } } catch (apache::thrift::TException &e) { std::stringstream ss; ss << "es get_next error: scan_range_idx=" << _scan_range_idx << ", msg=" << e.what(); LOG(WARNING) << ss.str(); return Status::ThriftRpcError(ss.str()); } #else TStatus status; result.__set_status(status); result.__set_eos(true); TExtColumnData col_data; std::vector is_null; is_null.push_back(false); col_data.__set_is_null(is_null); std::vector int_vals; int_vals.push_back(1); int_vals.push_back(2); col_data.__set_int_vals(int_vals); std::vector cols; cols.push_back(col_data); TExtRowBatch rows; rows.__set_cols(cols); rows.__set_num_rows(2); result.__set_rows(rows); return Status(status); #endif // check result VLOG(1) << "es get_next result=" << apache::thrift::ThriftDebugString(result); Status get_next_status(result.status); if (!get_next_status.ok()) { LOG(WARNING) << "es get_next error: scan_range_idx=" << _scan_range_idx << ", address=" << address << ", msg=" << get_next_status.get_error_msg(); return get_next_status; } if (!result.__isset.rows || !result.rows.__isset.num_rows) { std::stringstream ss; ss << "es get_next error: scan_range_idx=" << _scan_range_idx << ", msg=rows or num_rows not in result"; LOG(WARNING) << ss.str(); return Status::InternalError(ss.str()); } return Status::OK(); } Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple, const vector& cols, int row_idx, vector& cols_next_val_idx) { tuple->init(_tuple_desc->byte_size()); for (int i = 0; i < _tuple_desc->slots().size(); ++i) { const SlotDescriptor* slot_desc = _tuple_desc->slots()[i]; if (!slot_desc->is_materialized()) { continue; } void* slot = tuple->get_slot(slot_desc->tuple_offset()); const TExtColumnData& col = cols[i]; if (col.is_null[row_idx]) { tuple->set_null(slot_desc->null_indicator_offset()); continue; } else { tuple->set_not_null(slot_desc->null_indicator_offset()); } int val_idx = cols_next_val_idx[i]++; switch (slot_desc->type().type) { case TYPE_CHAR: case TYPE_VARCHAR: { if (val_idx >= col.string_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING")); } const string& val = col.string_vals[val_idx]; size_t val_size = val.size(); char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); if (UNLIKELY(buffer == NULL)) { string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size); } memcpy(buffer, val.data(), val_size); reinterpret_cast(slot)->ptr = buffer; reinterpret_cast(slot)->len = val_size; break; } case TYPE_TINYINT: if (val_idx >= col.byte_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT")); } *reinterpret_cast(slot) = col.byte_vals[val_idx]; break; case TYPE_SMALLINT: if (val_idx >= col.short_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT")); } *reinterpret_cast(slot) = col.short_vals[val_idx]; break; case TYPE_INT: if (val_idx >= col.int_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "INT")); } *reinterpret_cast(slot) = col.int_vals[val_idx]; break; case TYPE_BIGINT: if (val_idx >= col.long_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT")); } *reinterpret_cast(slot) = col.long_vals[val_idx]; break; case TYPE_LARGEINT: if (val_idx >= col.long_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT")); } *reinterpret_cast(slot) = col.long_vals[val_idx]; break; case TYPE_DOUBLE: if (val_idx >= col.double_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE")); } *reinterpret_cast(slot) = col.double_vals[val_idx]; break; case TYPE_FLOAT: if (val_idx >= col.double_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT")); } *reinterpret_cast(slot) = col.double_vals[val_idx]; break; case TYPE_BOOLEAN: if (val_idx >= col.bool_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN")); } *reinterpret_cast(slot) = col.bool_vals[val_idx]; break; case TYPE_DATE: if (val_idx >= col.long_vals.size() || !reinterpret_cast(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE")); } reinterpret_cast(slot)->cast_to_date(); break; case TYPE_DATETIME: { if (val_idx >= col.long_vals.size() || !reinterpret_cast(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME")); } reinterpret_cast(slot)->set_type(TIME_DATETIME); break; } case TYPE_DECIMAL: { if (val_idx >= col.binary_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DECIMAL")); } const string& val = col.binary_vals[val_idx]; *reinterpret_cast(slot) = *reinterpret_cast(&val); break; } default: DCHECK(false); } } return Status::OK(); } }