// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include "exprs/cast_functions.h" #include "gen_cpp/Descriptors_types.h" #include "gen_cpp/PlanNodes_types.h" #include "olap/push_handler.h" #include "olap/row.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/user_function_cache.h" namespace doris { class PushHandlerTest : public testing::Test { public: PushHandlerTest() { init(); } static void SetUpTestCase() { UserFunctionCache::instance()->init("./be/test/runtime/test_data/user_function_cache/normal"); CastFunctions::init(); } protected: virtual void SetUp() {} virtual void TearDown() {} private: void init(); Schema create_schema(); int create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id); int create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id); void create_expr_info(); TDescriptorTable init_desc_table(); TDescriptorTable _t_desc_table; TBrokerScanRangeParams _params; }; Schema PushHandlerTest::create_schema() { std::vector columns; columns.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_INT, true); columns.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_SMALLINT, true); columns.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_VARCHAR, true); columns.emplace_back(OLAP_FIELD_AGGREGATION_SUM, OLAP_FIELD_TYPE_BIGINT, true); Schema schema(columns, 3); return schema; } #define TUPLE_ID_DST 0 #define TUPLE_ID_SRC 1 #define CLOMN_NUMBERS 4 #define DST_TUPLE_SLOT_ID_START 1 #define SRC_TUPLE_SLOT_ID_START 5 int PushHandlerTest::create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id) { const char *clomnNames[] = {"k1_int", "k2_smallint", "k3_varchar", "v_bigint"}; for (int i = 0; i < CLOMN_NUMBERS; i++) { TSlotDescriptor slot_desc; slot_desc.id = next_slot_id++; slot_desc.parent = 1; TTypeDesc type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::VARCHAR); scalar_type.__set_len(65535); node.__set_scalar_type(scalar_type); type.types.push_back(node); } slot_desc.slotType = type; slot_desc.columnPos = i; slot_desc.byteOffset = i*16+8; // 8 bytes for null slot_desc.nullIndicatorBit = i%8; slot_desc.colName = clomnNames[i]; slot_desc.slotIdx = i + 1; slot_desc.isMaterialized = true; t_desc_table.slotDescriptors.push_back(slot_desc); } { // TTupleDescriptor source TTupleDescriptor t_tuple_desc; t_tuple_desc.id = TUPLE_ID_SRC; t_tuple_desc.byteSize = CLOMN_NUMBERS*16+8; // 8 bytes for null t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; t_desc_table.tupleDescriptors.push_back(t_tuple_desc); } return next_slot_id; } int PushHandlerTest::create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id) { {//k1_int TSlotDescriptor slot_desc; slot_desc.id = next_slot_id++; slot_desc.parent = 0; TTypeDesc type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::INT); node.__set_scalar_type(scalar_type); type.types.push_back(node); } slot_desc.slotType = type; slot_desc.columnPos = -1; slot_desc.byteOffset = 4; slot_desc.nullIndicatorByte = 0; slot_desc.nullIndicatorBit = 1; slot_desc.colName = "k1_int"; slot_desc.slotIdx = 1; slot_desc.isMaterialized = true; t_desc_table.slotDescriptors.push_back(slot_desc); } {// k2_smallint TSlotDescriptor slot_desc; slot_desc.id = next_slot_id++; slot_desc.parent = 0; TTypeDesc type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::SMALLINT); node.__set_scalar_type(scalar_type); type.types.push_back(node); } slot_desc.slotType = type; slot_desc.columnPos = -1; slot_desc.byteOffset = 2; slot_desc.nullIndicatorByte = 0; slot_desc.nullIndicatorBit = 0; slot_desc.colName = "k2_smallint"; slot_desc.slotIdx = 0; slot_desc.isMaterialized = true; t_desc_table.slotDescriptors.push_back(slot_desc); } {//k3_varchar TSlotDescriptor slot_desc; slot_desc.id = next_slot_id++; slot_desc.parent = 0; TTypeDesc type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::VARCHAR); scalar_type.__set_len(65535); node.__set_scalar_type(scalar_type); type.types.push_back(node); } slot_desc.slotType = type; slot_desc.columnPos = -1; slot_desc.byteOffset = 16; slot_desc.nullIndicatorByte = 0; slot_desc.nullIndicatorBit = 3; slot_desc.colName = "k3_varchar"; slot_desc.slotIdx = 3; slot_desc.isMaterialized = true; t_desc_table.slotDescriptors.push_back(slot_desc); } {// v_bigint TSlotDescriptor slot_desc; slot_desc.id = next_slot_id++; slot_desc.parent = 0; TTypeDesc type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::BIGINT); node.__set_scalar_type(scalar_type); type.types.push_back(node); } slot_desc.slotType = type; slot_desc.columnPos = -1; slot_desc.byteOffset = 8; slot_desc.nullIndicatorByte = 0; slot_desc.nullIndicatorBit = 2; slot_desc.colName = "v_bigint"; slot_desc.slotIdx = 2; slot_desc.isMaterialized = true; t_desc_table.slotDescriptors.push_back(slot_desc); } t_desc_table.__isset.slotDescriptors = true; { // TTupleDescriptor dest TTupleDescriptor t_tuple_desc; t_tuple_desc.id = TUPLE_ID_DST; t_tuple_desc.byteSize = 32; t_tuple_desc.numNullBytes = 1; t_tuple_desc.tableId = 0; t_tuple_desc.__isset.tableId = true; t_desc_table.tupleDescriptors.push_back(t_tuple_desc); } return next_slot_id; } TDescriptorTable PushHandlerTest::init_desc_table() { TDescriptorTable t_desc_table; int next_slot_id = 1; next_slot_id = create_dst_tuple(t_desc_table, next_slot_id); next_slot_id = create_src_tuple(t_desc_table, next_slot_id); return t_desc_table; } void PushHandlerTest::create_expr_info() { TTypeDesc varchar_type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::VARCHAR); scalar_type.__set_len(5000); node.__set_scalar_type(scalar_type); varchar_type.types.push_back(node); } // k1_int VARCHAR --> INT { TTypeDesc int_type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::INT); node.__set_scalar_type(scalar_type); int_type.types.push_back(node); } TExprNode cast_expr; cast_expr.node_type = TExprNodeType::CAST_EXPR; cast_expr.type = int_type; cast_expr.__set_opcode(TExprOpcode::CAST); cast_expr.__set_num_children(1); cast_expr.__set_output_scale(-1); cast_expr.__isset.fn = true; cast_expr.fn.name.function_name = "casttoint"; cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; cast_expr.fn.arg_types.push_back(varchar_type); cast_expr.fn.ret_type = int_type; cast_expr.fn.has_var_args = false; cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); cast_expr.fn.__isset.scalar_fn = true; cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_int_val"; TExprNode slot_ref; slot_ref.node_type = TExprNodeType::SLOT_REF; slot_ref.type = varchar_type; slot_ref.num_children = 0; slot_ref.__isset.slot_ref = true; slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START; slot_ref.slot_ref.tuple_id = 1; TExpr expr; expr.nodes.push_back(cast_expr); expr.nodes.push_back(slot_ref); _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START, expr); _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START); } // k2_smallint VARCHAR --> SMALLINT { TTypeDesc smallint_type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::SMALLINT); node.__set_scalar_type(scalar_type); smallint_type.types.push_back(node); } TExprNode cast_expr; cast_expr.node_type = TExprNodeType::CAST_EXPR; cast_expr.type = smallint_type; cast_expr.__set_opcode(TExprOpcode::CAST); cast_expr.__set_num_children(1); cast_expr.__set_output_scale(-1); cast_expr.__isset.fn = true; cast_expr.fn.name.function_name = "casttosmallint"; cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; cast_expr.fn.arg_types.push_back(varchar_type); cast_expr.fn.ret_type = smallint_type; cast_expr.fn.has_var_args = false; cast_expr.fn.__set_signature("casttosmallint(VARCHAR(*))"); cast_expr.fn.__isset.scalar_fn = true; cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_small_int_val"; TExprNode slot_ref; slot_ref.node_type = TExprNodeType::SLOT_REF; slot_ref.type = varchar_type; slot_ref.num_children = 0; slot_ref.__isset.slot_ref = true; slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 1; slot_ref.slot_ref.tuple_id = 1; TExpr expr; expr.nodes.push_back(cast_expr); expr.nodes.push_back(slot_ref); _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 1, expr); _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 1); } // k3_varchar VARCHAR --> VARCHAR { TExprNode slot_ref; slot_ref.node_type = TExprNodeType::SLOT_REF; slot_ref.type = varchar_type; slot_ref.num_children = 0; slot_ref.__isset.slot_ref = true; slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 2; slot_ref.slot_ref.tuple_id = 1; TExpr expr; expr.nodes.push_back(slot_ref); _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 2, expr); _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 2); } // v_bigint VARCHAR --> BIGINT { TTypeDesc bigint_type; { TTypeNode node; node.__set_type(TTypeNodeType::SCALAR); TScalarType scalar_type; scalar_type.__set_type(TPrimitiveType::BIGINT); node.__set_scalar_type(scalar_type); bigint_type.types.push_back(node); } TExprNode cast_expr; cast_expr.node_type = TExprNodeType::CAST_EXPR; cast_expr.type = bigint_type; cast_expr.__set_opcode(TExprOpcode::CAST); cast_expr.__set_num_children(1); cast_expr.__set_output_scale(-1); cast_expr.__isset.fn = true; cast_expr.fn.name.function_name = "casttobigint"; cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; cast_expr.fn.arg_types.push_back(varchar_type); cast_expr.fn.ret_type = bigint_type; cast_expr.fn.has_var_args = false; cast_expr.fn.__set_signature("casttobigint(VARCHAR(*))"); cast_expr.fn.__isset.scalar_fn = true; cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_big_int_val"; TExprNode slot_ref; slot_ref.node_type = TExprNodeType::SLOT_REF; slot_ref.type = varchar_type; slot_ref.num_children = 0; slot_ref.__isset.slot_ref = true; slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 3; slot_ref.slot_ref.tuple_id = 1; TExpr expr; expr.nodes.push_back(cast_expr); expr.nodes.push_back(slot_ref); _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 3, expr); _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 3); } _params.__set_dest_tuple_id(TUPLE_ID_DST); _params.__set_src_tuple_id(TUPLE_ID_SRC); } void PushHandlerTest::init() { create_expr_info(); _t_desc_table = init_desc_table(); } TEST_F(PushHandlerTest, PushBrokerReaderNormal) { TBrokerScanRange broker_scan_range; broker_scan_range.params = _params; TBrokerRangeDesc range; range.start_offset = 0; range.size = -1; range.format_type = TFileFormatType::FORMAT_PARQUET; range.splittable = false; range.path = "./be/test/olap/test_data/push_broker_reader.parquet"; range.file_type = TFileType::FILE_LOCAL; broker_scan_range.ranges.push_back(range); ExecEnv::GetInstance()->_thread_mgr = new ThreadResourceMgr(); Schema schema = create_schema(); // data // k1_int k2_smallint varchar bigint // 0 0 a0 0 // 0 2 a1 3 // 1 4 a2 6 PushBrokerReader reader; reader.init(&schema, broker_scan_range, _t_desc_table); uint8_t* tuple_buf = reader.mem_pool()->allocate(schema.schema_size()); ContiguousRow row(&schema, tuple_buf); // line 1 reader.next(&row); ASSERT_FALSE(reader.eof()); ASSERT_EQ(0, *(int32_t*)row.cell(0).cell_ptr()); ASSERT_EQ(0, *(int16_t*)row.cell(1).cell_ptr()); ASSERT_EQ("a0", ((Slice*)row.cell(2).cell_ptr())->to_string()); ASSERT_EQ(0, *(int64_t*)row.cell(3).cell_ptr()); // line 2 reader.next(&row); ASSERT_FALSE(reader.eof()); ASSERT_EQ(0, *(int32_t*)row.cell(0).cell_ptr()); ASSERT_EQ(2, *(int16_t*)row.cell(1).cell_ptr()); ASSERT_EQ("a1", ((Slice*)row.cell(2).cell_ptr())->to_string()); ASSERT_EQ(3, *(int64_t*)row.cell(3).cell_ptr()); // line 3 reader.next(&row); ASSERT_FALSE(reader.eof()); ASSERT_EQ(1, *(int32_t*)row.cell(0).cell_ptr()); ASSERT_EQ(4, *(int16_t*)row.cell(1).cell_ptr()); ASSERT_EQ("a2", ((Slice*)row.cell(2).cell_ptr())->to_string()); ASSERT_EQ(6, *(int64_t*)row.cell(3).cell_ptr()); // eof reader.next(&row); ASSERT_TRUE(reader.eof()); reader.close(); } } // namespace doris int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); CpuInfo::init(); return RUN_ALL_TESTS(); }