From 839ec45197fb4559c4946bf0fa4ce3b6805b4248 Mon Sep 17 00:00:00 2001 From: trueeyu Date: Thu, 20 Feb 2020 20:43:26 +0800 Subject: [PATCH] Remove llvm relative code from be/src/exec (#2955) Remove unused LLVM related codes of directory:be/src/exec (#2910) there are many LLVM related codes in code base, but these codes are not really used. The higher version of GCC is not compatible with the LLVM 3.4.2 version currently used by Doris. The PR delete all LLVM related code of directory: be/src/exec. --- be/src/exec/aggregation_node.cpp | 561 --------------- be/src/exec/aggregation_node.h | 24 - be/src/exec/blocking_join_node.cpp | 2 - be/src/exec/blocking_join_node.h | 2 - be/src/exec/exec_node.cpp | 120 ---- be/src/exec/exec_node.h | 12 - be/src/exec/hash_join_node.cpp | 245 ------- be/src/exec/hash_join_node.h | 20 - be/src/exec/hash_table.cpp | 499 -------------- be/src/exec/hash_table.h | 22 - be/src/exec/merge_join_node.h | 2 - .../exec/new_partitioned_aggregation_node.cc | 618 ----------------- .../exec/new_partitioned_aggregation_node.h | 41 -- be/src/exec/new_partitioned_hash_table.cc | 644 ------------------ be/src/exec/new_partitioned_hash_table.h | 37 - be/src/exec/olap_scan_node.cpp | 14 - be/src/exec/partitioned_aggregation_node.cc | 478 ------------- be/src/exec/partitioned_aggregation_node.h | 21 - be/src/exec/partitioned_hash_table.cc | 505 -------------- be/src/exec/partitioned_hash_table.h | 23 - be/src/exec/topn_node.cpp | 7 - be/src/exec/union_node.cpp | 49 -- be/src/exec/union_node.h | 1 - 23 files changed, 3947 deletions(-) diff --git a/be/src/exec/aggregation_node.cpp b/be/src/exec/aggregation_node.cpp index 1b3ff19147..1f50f46378 100644 --- a/be/src/exec/aggregation_node.cpp +++ b/be/src/exec/aggregation_node.cpp @@ -24,7 +24,6 @@ #include #include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exec/hash_table.hpp" #include "exprs/agg_fn_evaluator.h" #include "exprs/expr.h" @@ -41,17 +40,8 @@ #include "runtime/tuple_row.h" #include "util/runtime_profile.h" -using llvm::BasicBlock; -using llvm::Function; -using llvm::PointerType; -using llvm::Type; -using llvm::Value; -using llvm::StructType; - namespace doris { -const char* AggregationNode::_s_llvm_class_name = "class.doris::AggregationNode"; - // TODO: pass in maximum size; enforce by setting limit in mempool // TODO: have a Status ExecNode::init(const TPlanNode&) member function // that does initialization outside of c'tor, so we can indicate errors @@ -65,7 +55,6 @@ AggregationNode::AggregationNode( _singleton_output_tuple(NULL), //_tuple_pool(new MemPool()), // - _codegen_process_row_batch_fn(NULL), _process_row_batch_fn(NULL), _needs_finalize(tnode.agg_node.need_finalize), _build_timer(NULL), @@ -156,22 +145,6 @@ Status AggregationNode::prepare(RuntimeState* state) { _singleton_output_tuple = construct_intermediate_tuple(); } - if (state->codegen_level() > 0) { - LlvmCodeGen* codegen = NULL; - RETURN_IF_ERROR(state->get_codegen(&codegen)); - Function* update_tuple_fn = codegen_update_tuple(state); - if (update_tuple_fn != NULL) { - _codegen_process_row_batch_fn = - codegen_process_row_batch(state, update_tuple_fn); - if (_codegen_process_row_batch_fn != NULL) { - // Update to using codegen'd process row batch. - codegen->add_function_to_jit(_codegen_process_row_batch_fn, - reinterpret_cast(&_process_row_batch_fn)); - // AddRuntimeExecOption("Codegen Enabled"); - } - } - } - return Status::OK(); } @@ -481,539 +454,5 @@ void AggregationNode::push_down_predicate(RuntimeState *state, return; } -static IRFunction::Type get_hll_update_function2(const TypeDescriptor& type) { - switch (type.type) { - case TYPE_BOOLEAN: - return IRFunction::HLL_UPDATE_BOOLEAN; - case TYPE_TINYINT: - return IRFunction::HLL_UPDATE_TINYINT; - case TYPE_SMALLINT: - return IRFunction::HLL_UPDATE_SMALLINT; - case TYPE_INT: - return IRFunction::HLL_UPDATE_INT; - case TYPE_BIGINT: - return IRFunction::HLL_UPDATE_BIGINT; - case TYPE_FLOAT: - return IRFunction::HLL_UPDATE_FLOAT; - case TYPE_DOUBLE: - return IRFunction::HLL_UPDATE_DOUBLE; - case TYPE_CHAR: - case TYPE_VARCHAR: - return IRFunction::HLL_UPDATE_STRING; - case TYPE_DECIMAL: - return IRFunction::HLL_UPDATE_DECIMAL; - default: - DCHECK(false) << "Unsupported type: " << type; - return IRFunction::FN_END; - } -} - -// IR Generation for updating a single aggregation slot. Signature is: -// void update_slot(FunctionContext* fn_ctx, AggTuple* agg_tuple, char** row) -// -// The IR for sum(double_col) is: -// define void @update_slot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, double }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 128241264 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %dst_val = load double* %dst_slot_ptr -// %val = extractvalue { i8, double } %src, 1 -// %1 = fadd double %dst_val, %val -// store double %1, double* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -// -// The IR for min(double_col) is: -// define void @update_slot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, double }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 128241264 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_is_null = call i8 @is_null(tuple); -// br i1 %dst_is_null, label dst_null, label dst_not_null -// -// dst_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %val = extractvalue { i8, double } %src, 1 -// store double %val, double* %dst_slot_ptr -// br label %ret -// -// dst_not_null: ; preds = %src_not_null -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %dst_val = load double* %dst_slot_ptr -// %val = extractvalue { i8, double } %src, 1 -// %1 = fadd double %dst_val, %val -// store double %1, double* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -// The IR for ndv(double_col) is: -// define void @update_slot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, %"struct.doris::StringValue" }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %dst_lowered_ptr = alloca { i64, i8* } -// %src_lowered_ptr = alloca { i8, double } -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 120530832 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds -// { i8, %"struct.doris::StringValue" }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, %"struct.doris::StringValue" }* %agg_tuple) -// %dst_val = load %"struct.doris::StringValue"* %dst_slot_ptr -// store { i8, double } %src, { i8, double }* %src_lowered_ptr -// %src_unlowered_ptr = bitcast { i8, double }* %src_lowered_ptr -// to %"struct.doris_udf::DoubleVal"* -// %ptr = extractvalue %"struct.doris::StringValue" %dst_val, 0 -// %dst_stringval = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1 -// %len = extractvalue %"struct.doris::StringValue" %dst_val, 1 -// %1 = extractvalue { i64, i8* } %dst_stringval, 0 -// %2 = zext i32 %len to i64 -// %3 = shl i64 %2, 32 -// %4 = and i64 %1, 4294967295 -// %5 = or i64 %4, %3 -// %dst_stringval1 = insertvalue { i64, i8* } %dst_stringval, i64 %5, 0 -// store { i64, i8* } %dst_stringval1, { i64, i8* }* %dst_lowered_ptr -// %dst_unlowered_ptr = bitcast { i64, i8* }* %dst_lowered_ptr -// to %"struct.doris_udf::StringVal"* -// call void @HllUpdate(%"class.doris_udf::FunctionContext"* %fn_ctx, -// %"struct.doris_udf::DoubleVal"* %src_unlowered_ptr, -// %"struct.doris_udf::StringVal"* %dst_unlowered_ptr) -// %anyval_result = load { i64, i8* }* %dst_lowered_ptr -// %6 = extractvalue { i64, i8* } %anyval_result, 1 -// %7 = insertvalue %"struct.doris::StringValue" zeroinitializer, i8* %6, 0 -// %8 = extractvalue { i64, i8* } %anyval_result, 0 -// %9 = ashr i64 %8, 32 -// %10 = trunc i64 %9 to i32 -// %11 = insertvalue %"struct.doris::StringValue" %7, i32 %10, 1 -// store %"struct.doris::StringValue" %11, %"struct.doris::StringValue"* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -llvm::Function* AggregationNode::codegen_update_slot( - RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc) { - DCHECK(slot_desc->is_materialized()); - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1); - ExprContext* input_expr_ctx = evaluator->input_expr_ctxs()[0]; - Expr* input_expr = input_expr_ctx->root(); - // TODO: implement timestamp - if (input_expr->type().type == TYPE_DATETIME - || input_expr->type().type == TYPE_DATE - || input_expr->type().type == TYPE_DECIMAL - || input_expr->type().is_string_type()) { - return NULL; - } - Function* agg_expr_fn = NULL; - Status status = input_expr->get_codegend_compute_fn(state, &agg_expr_fn); - if (!status.ok()) { - LOG(INFO) << "Could not codegen update_slot(): " << status.get_error_msg(); - return NULL; - } - DCHECK(agg_expr_fn != NULL); - - PointerType* fn_ctx_type = - codegen->get_ptr_type(FunctionContextImpl::_s_llvm_functioncontext_name); - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - PointerType* tuple_ptr_type = PointerType::get(tuple_struct, 0); - PointerType* tuple_row_ptr_type = codegen->get_ptr_type(TupleRow::_s_llvm_class_name); - - // Create update_slot prototype - LlvmCodeGen::FnPrototype prototype(codegen, "update_slot", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("fn_ctx", fn_ctx_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - Value* fn_ctx_arg = args[0]; - Value* agg_tuple_arg = args[1]; - Value* row_arg = args[2]; - - BasicBlock* src_not_null_block = NULL; - BasicBlock* dst_null_block = NULL; - BasicBlock* dst_not_null_block = NULL; - if (evaluator->agg_op() == AggFnEvaluator::MIN - || evaluator->agg_op() == AggFnEvaluator::MAX) { - src_not_null_block = BasicBlock::Create(codegen->context(), "src_not_null", fn); - dst_null_block = BasicBlock::Create(codegen->context(), "dst_null", fn); - } - dst_not_null_block = BasicBlock::Create(codegen->context(), "dst_not_null", fn); - BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", fn); - - // Call expr function to get src slot value - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), input_expr_ctx); - Value* agg_expr_fn_args[] = { ctx_arg, row_arg }; - CodegenAnyVal src = CodegenAnyVal::create_call_wrapped( - codegen, &builder, input_expr->type(), agg_expr_fn, agg_expr_fn_args, "src", NULL); - - Value* src_is_null = src.get_is_null(); - if (evaluator->agg_op() == AggFnEvaluator::MIN - || evaluator->agg_op() == AggFnEvaluator::MAX) { - builder.CreateCondBr(src_is_null, ret_block, src_not_null_block); - - // Src slot is not null - builder.SetInsertPoint(src_not_null_block); - Function* is_null_fn = slot_desc->codegen_is_null(codegen, tuple_struct); - Value* dst_is_null = builder.CreateCall(is_null_fn, agg_tuple_arg); - builder.CreateCondBr(dst_is_null, dst_null_block, dst_not_null_block); - // dst slot is null - builder.SetInsertPoint(dst_null_block); - Value* dst_ptr = - builder.CreateStructGEP(agg_tuple_arg, slot_desc->field_idx(), "dst_slot_ptr"); - if (slot_desc->is_nullable()) { - // Dst is NULL, just update dst slot to src slot and clear null bit - Function* clear_null_fn = slot_desc->codegen_update_null(codegen, tuple_struct, false); - builder.CreateCall(clear_null_fn, agg_tuple_arg); - } - builder.CreateStore(src.get_val(), dst_ptr); - builder.CreateBr(ret_block); - } else { - builder.CreateCondBr(src_is_null, ret_block, dst_not_null_block); - } - - - // Src slot is not null, update dst_slot - builder.SetInsertPoint(dst_not_null_block); - Value* dst_ptr = - builder.CreateStructGEP(agg_tuple_arg, slot_desc->field_idx(), "dst_slot_ptr"); - Value* result = NULL; - - if (slot_desc->is_nullable()) { - // Dst is NULL, just update dst slot to src slot and clear null bit - Function* clear_null_fn = slot_desc->codegen_update_null(codegen, tuple_struct, false); - builder.CreateCall(clear_null_fn, agg_tuple_arg); - } - - // Update the slot - Value* dst_value = builder.CreateLoad(dst_ptr, "dst_val"); - switch (evaluator->agg_op()) { - case AggFnEvaluator::COUNT: - if (evaluator->is_merge()) { - result = builder.CreateAdd(dst_value, src.get_val(), "count_sum"); - } else { - result = builder.CreateAdd( - dst_value, codegen->get_int_constant(TYPE_BIGINT, 1), "count_inc"); - } - break; - case AggFnEvaluator::MIN: { - Function* min_fn = codegen->codegen_min_max(slot_desc->type(), true); - Value* min_args[] = { dst_value, src.get_val() }; - result = builder.CreateCall(min_fn, min_args, "min_value"); - break; - } - case AggFnEvaluator::MAX: { - Function* max_fn = codegen->codegen_min_max(slot_desc->type(), false); - Value* max_args[] = { dst_value, src.get_val() }; - result = builder.CreateCall(max_fn, max_args, "max_value"); - break; - } - case AggFnEvaluator::SUM: - if (slot_desc->type().type == TYPE_FLOAT || slot_desc->type().type == TYPE_DOUBLE) { - result = builder.CreateFAdd(dst_value, src.get_val()); - } else { - result = builder.CreateAdd(dst_value, src.get_val()); - } - break; - case AggFnEvaluator::NDV: { - DCHECK_EQ(slot_desc->type().type, TYPE_VARCHAR); - IRFunction::Type ir_function_type = evaluator->is_merge() ? IRFunction::HLL_MERGE - : get_hll_update_function2(input_expr->type()); - Function* hll_fn = codegen->get_function(ir_function_type); - - // Create pointer to src_anyval to pass to HllUpdate() function. We must use the - // unlowered type. - Value* src_lowered_ptr = codegen->create_entry_block_alloca( - fn, LlvmCodeGen::NamedVariable("src_lowered_ptr", src.value()->getType())); - builder.CreateStore(src.value(), src_lowered_ptr); - Type* unlowered_ptr_type = - CodegenAnyVal::get_unlowered_type(codegen, input_expr->type())->getPointerTo(); - Value* src_unlowered_ptr = - builder.CreateBitCast(src_lowered_ptr, unlowered_ptr_type, "src_unlowered_ptr"); - - // Create StringVal* intermediate argument from dst_value - CodegenAnyVal dst_stringval = CodegenAnyVal::get_non_null_val( - codegen, &builder, TypeDescriptor(TYPE_VARCHAR), "dst_stringval"); - dst_stringval.set_from_raw_value(dst_value); - // Create pointer to dst_stringval to pass to HllUpdate() function. We must use - // the unlowered type. - Value* dst_lowered_ptr = codegen->create_entry_block_alloca( - fn, LlvmCodeGen::NamedVariable("dst_lowered_ptr", - dst_stringval.value()->getType())); - builder.CreateStore(dst_stringval.value(), dst_lowered_ptr); - unlowered_ptr_type = - codegen->get_ptr_type(CodegenAnyVal::get_unlowered_type( - codegen, TypeDescriptor(TYPE_VARCHAR))); - Value* dst_unlowered_ptr = - builder.CreateBitCast(dst_lowered_ptr, unlowered_ptr_type, "dst_unlowered_ptr"); - - // Call 'hll_fn' - builder.CreateCall3(hll_fn, fn_ctx_arg, src_unlowered_ptr, dst_unlowered_ptr); - - // Convert StringVal intermediate 'dst_arg' back to StringValue - Value* anyval_result = builder.CreateLoad(dst_lowered_ptr, "anyval_result"); - result = CodegenAnyVal(codegen, &builder, TypeDescriptor(TYPE_VARCHAR), anyval_result) - .to_native_value(); - break; - } - default: - DCHECK(false) << "bad aggregate operator: " << evaluator->agg_op(); - } - - builder.CreateStore(result, dst_ptr); - builder.CreateBr(ret_block); - - builder.SetInsertPoint(ret_block); - builder.CreateRetVoid(); - - fn = codegen->finalize_function(fn); - return fn; -} - -// IR codegen for the update_tuple loop. This loop is query specific and -// based on the aggregate functions. The function signature must match the non- -// codegen'd update_tuple exactly. -// For the query: -// select count(*), count(int_col), sum(double_col) the IR looks like: -// -// define void @update_tuple(%"class.doris::AggregationNode"* %this_ptr, -// %"class.doris::Tuple"* %agg_tuple, -// %"class.doris::TupleRow"* %tuple_row) #20 { -// entry: -// %tuple = bitcast %"class.doris::Tuple"* %agg_tuple to { i8, i64, i64, double }* -// %src_slot = getelementptr inbounds { i8, i64, i64, double }* %tuple, i32 0, i32 1 -// %count_star_val = load i64* %src_slot -// %count_star_inc = add i64 %count_star_val, 1 -// store i64 %count_star_inc, i64* %src_slot -// call void @update_slot(%"class.doris_udf::FunctionContext"* inttoptr -// (i64 44521296 to %"class.doris_udf::FunctionContext"*), -// { i8, i64, i64, double }* %tuple, -// %"class.doris::TupleRow"* %tuple_row) -// call void @UpdateSlot5(%"class.doris_udf::FunctionContext"* inttoptr -// (i64 44521328 to %"class.doris_udf::FunctionContext"*), -// { i8, i64, i64, double }* %tuple, -// %"class.doris::TupleRow"* %tuple_row) -// ret void -// } -Function* AggregationNode::codegen_update_tuple(RuntimeState* state) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - - int j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - - // Timestamp and char are never supported. NDV supports decimal and string but no - // other functions. - // TODO: the other aggregate functions might work with decimal as-is - // TODO(zc) - if (slot_desc->type().type == TYPE_DATETIME || slot_desc->type().type == TYPE_CHAR || - (evaluator->agg_op() != AggFnEvaluator::NDV && - (slot_desc->type().type == TYPE_DECIMAL || - slot_desc->type().type == TYPE_CHAR || - slot_desc->type().type == TYPE_VARCHAR))) { - LOG(INFO) << "Could not codegen UpdateIntermediateTuple because " - << "string, char, timestamp and decimal are not yet supported."; - return NULL; - } - if (evaluator->agg_op() == AggFnEvaluator::COUNT_DISTINCT - || evaluator->agg_op() == AggFnEvaluator::SUM_DISTINCT) { - return NULL; - } - - // Don't codegen things that aren't builtins (for now) - if (!evaluator->is_builtin()) { - return NULL; - } - } - - if (_intermediate_tuple_desc->generate_llvm_struct(codegen) == NULL) { - LOG(INFO) << "Could not codegen update_tuple because we could" - << "not generate a matching llvm struct for the intermediate tuple."; - return NULL; - } - - // Get the types to match the update_tuple signature - Type* agg_node_type = codegen->get_type(AggregationNode::_s_llvm_class_name); - Type* agg_tuple_type = codegen->get_type(Tuple::_s_llvm_class_name); - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - - DCHECK(agg_node_type != NULL); - DCHECK(agg_tuple_type != NULL); - DCHECK(tuple_row_type != NULL); - - PointerType* agg_node_ptr_type = PointerType::get(agg_node_type, 0); - PointerType* agg_tuple_ptr_type = PointerType::get(agg_tuple_type, 0); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - // Signature for update_tuple is - // void update_tuple(AggregationNode* this, Tuple* tuple, TupleRow* row) - // This signature needs to match the non-codegen'd signature exactly. - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - PointerType* tuple_ptr = PointerType::get(tuple_struct, 0); - LlvmCodeGen::FnPrototype prototype(codegen, "update_tuple", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", agg_node_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_tuple", agg_tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("tuple_row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - - // Cast the parameter types to the internal llvm runtime types. - // TODO: get rid of this by using right type in function signature - args[1] = builder.CreateBitCast(args[1], tuple_ptr, "tuple"); - - // Loop over each expr and generate the IR for that slot. If the expr is not - // count(*), generate a helper IR function to update the slot and call that. - j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - if (evaluator->is_count_star()) { - // TODO: we should be able to hoist this up to the loop over the batch and just - // increment the slot by the number of rows in the batch. - int field_idx = slot_desc->field_idx(); - Value* const_one = codegen->get_int_constant(TYPE_BIGINT, 1); - Value* slot_ptr = builder.CreateStructGEP(args[1], field_idx, "src_slot"); - Value* slot_loaded = builder.CreateLoad(slot_ptr, "count_star_val"); - Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc"); - builder.CreateStore(count_inc, slot_ptr); - } else { - Function* update_slot_fn = codegen_update_slot(state, evaluator, slot_desc); - if (update_slot_fn == NULL) { - return NULL; - } - Value* fn_ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(FunctionContextImpl::_s_llvm_functioncontext_name), - _agg_fn_ctxs[i]); - builder.CreateCall3(update_slot_fn, fn_ctx_arg, args[1], args[2]); - } - } - builder.CreateRetVoid(); - - // CodegenProcessRowBatch() does the final optimizations. - return codegen->finalize_function(fn); -} - -Function* AggregationNode::codegen_process_row_batch( - RuntimeState* state, Function* update_tuple_fn) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - DCHECK(update_tuple_fn != NULL); - - // Get the cross compiled update row batch function - IRFunction::Type ir_fn = - (!_probe_expr_ctxs.empty() ? IRFunction::AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING - : IRFunction::AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING); - Function* process_batch_fn = codegen->get_function(ir_fn); - if (process_batch_fn == NULL) { - LOG(ERROR) << "Could not find AggregationNode::ProcessRowBatch in module."; - return NULL; - } - - int replaced = 0; - if (!_probe_expr_ctxs.empty()) { - // Aggregation w/o grouping does not use a hash table. - - // Codegen for hash - Function* hash_fn = _hash_tbl->codegen_hash_current_row(state); - if (hash_fn == NULL) { - return NULL; - } - - // Codegen HashTable::Equals - Function* equals_fn = _hash_tbl->codegen_equals(state); - if (equals_fn == NULL) { - return NULL; - } - - // Codegen for evaluating build rows - Function* eval_build_row_fn = _hash_tbl->codegen_eval_tuple_row(state, true); - if (eval_build_row_fn == NULL) { - return NULL; - } - - // Codegen for evaluating probe rows - Function* eval_probe_row_fn = _hash_tbl->codegen_eval_tuple_row(state, false); - if (eval_probe_row_fn == NULL) { - return NULL; - } - - // Replace call sites - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, eval_build_row_fn, "eval_build_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, eval_probe_row_fn, "eval_probe_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, hash_fn, "hash_current_row", &replaced); - DCHECK_EQ(replaced, 2); - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, equals_fn, "equals", &replaced); - DCHECK_EQ(replaced, 1); - } - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, update_tuple_fn, "update_tuple", &replaced); - DCHECK_EQ(replaced, 1) << "One call site should be replaced."; - DCHECK(process_batch_fn != NULL); - return codegen->optimize_function_with_exprs(process_batch_fn); -} } diff --git a/be/src/exec/aggregation_node.h b/be/src/exec/aggregation_node.h index 9100739221..72e9f0ad0b 100644 --- a/be/src/exec/aggregation_node.h +++ b/be/src/exec/aggregation_node.h @@ -28,14 +28,9 @@ #include "runtime/mem_pool.h" #include "runtime/string_value.h" -namespace llvm { -class Function; -} - namespace doris { class AggFnEvaluator; -class LlvmCodeGen; class RowBatch; class RuntimeState; struct StringValue; @@ -69,7 +64,6 @@ public: virtual void push_down_predicate( RuntimeState *state, std::list *expr_ctxs); - static const char* _s_llvm_class_name; private: boost::scoped_ptr _hash_tbl; HashTable::Iterator _output_iterator; @@ -98,9 +92,6 @@ private: Tuple* _singleton_output_tuple; // result of aggregation w/o GROUP BY boost::scoped_ptr _tuple_pool; - /// IR for process row batch. NULL if codegen is disabled. - llvm::Function* _codegen_process_row_batch_fn; - typedef void (*ProcessRowBatchFn)(AggregationNode*, RowBatch*); // Jitted ProcessRowBatch function pointer. Null if codegen is disabled. ProcessRowBatchFn _process_row_batch_fn; @@ -136,21 +127,6 @@ private: // Do the aggregation for all tuple rows in the batch void process_row_batch_no_grouping(RowBatch* batch, MemPool* pool); void process_row_batch_with_grouping(RowBatch* batch, MemPool* pool); - - /// Codegen the process row batch loop. The loop has already been compiled to - /// IR and loaded into the codegen object. UpdateAggTuple has also been - /// codegen'd to IR. This function will modify the loop subsituting the - /// UpdateAggTuple function call with the (inlined) codegen'd 'update_tuple_fn'. - llvm::Function* codegen_process_row_batch( - RuntimeState* state, llvm::Function* update_tuple_fn); - - /// Codegen for updating aggregate_exprs at slot_idx. Returns NULL if unsuccessful. - /// slot_idx is the idx into aggregate_exprs_ (does not include grouping exprs). - llvm::Function* codegen_update_slot( - RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc); - - /// Codegen UpdateTuple(). Returns NULL if codegen is unsuccessful. - llvm::Function* codegen_update_tuple(RuntimeState* state); }; } diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp index ed626966ce..e174973f1c 100644 --- a/be/src/exec/blocking_join_node.cpp +++ b/be/src/exec/blocking_join_node.cpp @@ -27,8 +27,6 @@ namespace doris { -const char* BlockingJoinNode::LLVM_CLASS_NAME = "class.doris::BlockingJoinNode"; - BlockingJoinNode::BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, ObjectPool* pool, diff --git a/be/src/exec/blocking_join_node.h b/be/src/exec/blocking_join_node.h index e1595c505d..964f996203 100644 --- a/be/src/exec/blocking_join_node.h +++ b/be/src/exec/blocking_join_node.h @@ -57,8 +57,6 @@ public: // BlockingJoinNode::close(). virtual Status close(RuntimeState* state); - static const char* LLVM_CLASS_NAME; - private: const std::string _node_name; TJoinOp::type _join_op; diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 6a75ce2af5..d8bd463216 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -21,7 +21,6 @@ #include #include -#include "codegen/llvm_codegen.h" #include "codegen/codegen_anyval.h" #include "common/object_pool.h" #include "common/status.h" @@ -62,13 +61,6 @@ #include "util/debug_util.h" #include "util/runtime_profile.h" -using llvm::Function; -using llvm::PointerType; -using llvm::Type; -using llvm::Value; -using llvm::LLVMContext; -using llvm::BasicBlock; - namespace doris { const std::string ExecNode::ROW_THROUGHPUT_COUNTER = "RowsReturnedRate"; @@ -586,118 +578,6 @@ Status ExecNode::exec_debug_action(TExecNodePhase::type phase) { return Status::OK(); } -// Codegen for EvalConjuncts. The generated signature is -// For a node with two conjunct predicates -// define i1 @EvalConjuncts(%"class.impala::ExprContext"** %ctxs, i32 %num_ctxs, -// %"class.impala::TupleRow"* %row) #20 { -// entry: -// %ctx_ptr = getelementptr %"class.impala::ExprContext"** %ctxs, i32 0 -// %ctx = load %"class.impala::ExprContext"** %ctx_ptr -// %result = call i16 @Eq_StringVal_StringValWrapper3( -// %"class.impala::ExprContext"* %ctx, %"class.impala::TupleRow"* %row) -// %is_null = trunc i16 %result to i1 -// %0 = ashr i16 %result, 8 -// %1 = trunc i16 %0 to i8 -// %val = trunc i8 %1 to i1 -// %is_false = xor i1 %val, true -// %return_false = or i1 %is_null, %is_false -// br i1 %return_false, label %false, label %continue -// -// continue: ; preds = %entry -// %ctx_ptr2 = getelementptr %"class.impala::ExprContext"** %ctxs, i32 1 -// %ctx3 = load %"class.impala::ExprContext"** %ctx_ptr2 -// %result4 = call i16 @Gt_BigIntVal_BigIntValWrapper5( -// %"class.impala::ExprContext"* %ctx3, %"class.impala::TupleRow"* %row) -// %is_null5 = trunc i16 %result4 to i1 -// %2 = ashr i16 %result4, 8 -// %3 = trunc i16 %2 to i8 -// %val6 = trunc i8 %3 to i1 -// %is_false7 = xor i1 %val6, true -// %return_false8 = or i1 %is_null5, %is_false7 -// br i1 %return_false8, label %false, label %continue1 -// -// continue1: ; preds = %continue -// ret i1 true -// -// false: ; preds = %continue, %entry -// ret i1 false -// } -Function* ExecNode::codegen_eval_conjuncts( - RuntimeState* state, const std::vector& conjunct_ctxs, const char* name) { - Function* conjunct_fns[conjunct_ctxs.size()]; - for (int i = 0; i < conjunct_ctxs.size(); ++i) { - Status status = - conjunct_ctxs[i]->root()->get_codegend_compute_fn(state, &conjunct_fns[i]); - if (!status.ok()) { - VLOG_QUERY << "Could not codegen EvalConjuncts: " << status.get_error_msg(); - return NULL; - } - } - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Construct function signature to match - // bool EvalConjuncts(Expr** exprs, int num_exprs, TupleRow* row) - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - Type* expr_ctx_type = codegen->get_type(ExprContext::_s_llvm_class_name); - - DCHECK(tuple_row_type != NULL); - DCHECK(expr_ctx_type != NULL); - - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - PointerType* expr_ctx_ptr_type = PointerType::get(expr_ctx_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, name, codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument( - LlvmCodeGen::NamedVariable("ctxs", PointerType::get(expr_ctx_ptr_type, 0))); - prototype.add_argument( - LlvmCodeGen::NamedVariable("num_ctxs", codegen->get_type(TYPE_INT))); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* ctxs_arg = args[0]; - Value* tuple_row_arg = args[2]; - - if (conjunct_ctxs.size() > 0) { - LLVMContext& context = codegen->context(); - BasicBlock* false_block = BasicBlock::Create(context, "false", fn); - - for (int i = 0; i < conjunct_ctxs.size(); ++i) { - BasicBlock* true_block = BasicBlock::Create(context, "continue", fn, false_block); - - Value* ctx_arg_ptr = builder.CreateConstGEP1_32(ctxs_arg, i, "ctx_ptr"); - Value* ctx_arg = builder.CreateLoad(ctx_arg_ptr, "ctx"); - Value* expr_args[] = { ctx_arg, tuple_row_arg }; - - // Call conjunct_fns[i] - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, conjunct_ctxs[i]->root()->type(), - conjunct_fns[i], expr_args, "result", NULL); - - // Return false if result.is_null || !result - Value* is_null = result.get_is_null(); - Value* is_false = builder.CreateNot(result.get_val(), "is_false"); - Value* return_false = builder.CreateOr(is_null, is_false, "return_false"); - builder.CreateCondBr(return_false, false_block, true_block); - - // Set insertion point for continue/end - builder.SetInsertPoint(true_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - } else { - builder.CreateRet(codegen->true_value()); - } - - return codegen->finalize_function(fn); -} - Status ExecNode::claim_buffer_reservation(RuntimeState* state) { DCHECK(!_buffer_pool_client.is_registered()); BufferPool* buffer_pool = ExecEnv::GetInstance()->buffer_pool(); diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index e40a4d316a..ab51166084 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -33,10 +33,6 @@ #include "service/backend_options.h" #include "util/uid_util.h" // for print_id -namespace llvm { -class Function; -} - namespace doris { class Expr; @@ -134,14 +130,6 @@ public: // each implementation should start out by calling the default implementation. virtual Status close(RuntimeState* state); - llvm::Function* codegen_eval_conjuncts( - RuntimeState* state, const std::vector& conjunct_ctxs, const char* name); - - llvm::Function* codegen_eval_conjuncts( - RuntimeState* state, const std::vector& conjunct_ctxs) { - return codegen_eval_conjuncts(state, conjunct_ctxs, "EvalConjuncts"); - } - // Creates exec node tree from list of nodes contained in plan via depth-first // traversal. All nodes are placed in pool. // Returns error if 'plan' is corrupted, otherwise success. diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp index 084e8b9b38..c31e2ee462 100644 --- a/be/src/exec/hash_join_node.cpp +++ b/be/src/exec/hash_join_node.cpp @@ -19,7 +19,6 @@ #include -#include "codegen/llvm_codegen.h" #include "exec/hash_table.hpp" #include "exprs/expr.h" #include "exprs/in_predicate.h" @@ -29,21 +28,13 @@ #include "util/runtime_profile.h" #include "gen_cpp/PlanNodes_types.h" -using llvm::Function; -using llvm::PointerType; -using llvm::Type; -using llvm::Value; -using llvm::BasicBlock; -using llvm::LLVMContext; namespace doris { -const char* HashJoinNode::_s_llvm_class_name = "class.doris::HashJoinNode"; HashJoinNode::HashJoinNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), _join_op(tnode.hash_join_node.join_op), _probe_eos(false), - _codegen_process_build_batch_fn(NULL), _process_build_batch_fn(NULL), _process_probe_batch_fn(NULL), _anti_join_last_pos(NULL) { @@ -149,39 +140,6 @@ Status HashJoinNode::prepare(RuntimeState* state) { _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); - if (state->codegen_level() > 0) { - if (_join_op == TJoinOp::LEFT_ANTI_JOIN) { - return Status::OK(); - } - LlvmCodeGen* codegen = NULL; - RETURN_IF_ERROR(state->get_codegen(&codegen)); - - // Codegen for hashing rows - Function* hash_fn = _hash_tbl->codegen_hash_current_row(state); - if (hash_fn == NULL) { - return Status::OK(); - } - - // Codegen for build path - _codegen_process_build_batch_fn = codegen_process_build_batch(state, hash_fn); - if (_codegen_process_build_batch_fn != NULL) { - codegen->add_function_to_jit( - _codegen_process_build_batch_fn, - reinterpret_cast(&_process_build_batch_fn)); - // AddRuntimeExecOption("Build Side Codegen Enabled"); - } - - // Codegen for probe path (only for left joins) - if (!_match_all_build) { - Function* codegen_process_probe_batch_fn = codegen_process_probe_batch(state, hash_fn); - if (codegen_process_probe_batch_fn != NULL) { - codegen->add_function_to_jit(codegen_process_probe_batch_fn, - reinterpret_cast(&_process_probe_batch_fn)); - // AddRuntimeExecOption("Probe Side Codegen Enabled"); - } - } - } - return Status::OK(); } @@ -774,207 +732,4 @@ void HashJoinNode::create_output_row(TupleRow* out, TupleRow* probe, TupleRow* b } } -// This codegen'd function should only be used for left join cases so it assumes that -// the probe row is non-null. For a left outer join, the IR looks like: -// define void @CreateOutputRow(%"class.impala::HashBlockingNode"* %this_ptr, -// %"class.impala::TupleRow"* %out_arg, -// %"class.impala::TupleRow"* %probe_arg, -// %"class.impala::TupleRow"* %build_arg) { -// entry: -// %out = bitcast %"class.impala::TupleRow"* %out_arg to i8** -// %probe = bitcast %"class.impala::TupleRow"* %probe_arg to i8** -// %build = bitcast %"class.impala::TupleRow"* %build_arg to i8** -// %0 = bitcast i8** %out to i8* -// %1 = bitcast i8** %probe to i8* -// call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 16, i1 false) -// %is_build_null = icmp eq i8** %build, null -// br i1 %is_build_null, label %build_null, label %build_not_null -// -// build_not_null: ; preds = %entry -// %dst_tuple_ptr1 = getelementptr i8** %out, i32 1 -// %src_tuple_ptr = getelementptr i8** %build, i32 0 -// %2 = load i8** %src_tuple_ptr -// store i8* %2, i8** %dst_tuple_ptr1 -// ret void -// -// build_null: ; preds = %entry -// %dst_tuple_ptr = getelementptr i8** %out, i32 1 -// call void @llvm.memcpy.p0i8.p0i8.i32( -// i8* %dst_tuple_ptr, i8* %1, i32 16, i32 16, i1 false) -// ret void -// } -Function* HashJoinNode::codegen_create_output_row(LlvmCodeGen* codegen) { - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(HashJoinNode::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - // TupleRows are really just an array of pointers. Easier to work with them - // this way. - PointerType* tuple_row_working_type = PointerType::get(codegen->ptr_type(), 0); - - // Construct function signature to match CreateOutputRow() - LlvmCodeGen::FnPrototype prototype(codegen, "CreateOutputRow", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("out_arg", tuple_row_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("probe_arg", tuple_row_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("build_arg", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[4]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* out_row_arg = builder.CreateBitCast(args[1], tuple_row_working_type, "out"); - Value* probe_row_arg = builder.CreateBitCast(args[2], tuple_row_working_type, "probe"); - Value* build_row_arg = builder.CreateBitCast(args[3], tuple_row_working_type, "build"); - - int num_probe_tuples = child(0)->row_desc().tuple_descriptors().size(); - int num_build_tuples = child(1)->row_desc().tuple_descriptors().size(); - - // Copy probe row - codegen->codegen_memcpy(&builder, out_row_arg, probe_row_arg, _probe_tuple_row_size); - Value* build_row_idx[] = { codegen->get_int_constant(TYPE_INT, num_probe_tuples) }; - Value* build_row_dst = builder.CreateGEP(out_row_arg, build_row_idx, "build_dst_ptr"); - - // Copy build row. - BasicBlock* build_not_null_block = BasicBlock::Create(context, "build_not_null", fn); - BasicBlock* build_null_block = NULL; - - if (_match_all_probe) { - // build tuple can be null - build_null_block = BasicBlock::Create(context, "build_null", fn); - Value* is_build_null = builder.CreateIsNull(build_row_arg, "is_build_null"); - builder.CreateCondBr(is_build_null, build_null_block, build_not_null_block); - - // Set tuple build ptrs to NULL - // TODO: this should be replaced with memset() but I can't get the llvm intrinsic - // to work. - builder.SetInsertPoint(build_null_block); - for (int i = 0; i < num_build_tuples; ++i) { - Value* array_idx[] = - { codegen->get_int_constant(TYPE_INT, i + num_probe_tuples) }; - Value* dst = builder.CreateGEP(out_row_arg, array_idx, "dst_tuple_ptr"); - builder.CreateStore(codegen->null_ptr_value(), dst); - } - builder.CreateRetVoid(); - } else { - // build row can't be NULL - builder.CreateBr(build_not_null_block); - } - - // Copy build tuple ptrs - builder.SetInsertPoint(build_not_null_block); - codegen->codegen_memcpy(&builder, build_row_dst, build_row_arg, _build_tuple_row_size); - builder.CreateRetVoid(); - - return codegen->finalize_function(fn); -} - -Function* HashJoinNode::codegen_process_build_batch(RuntimeState* state, Function* hash_fn) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get cross compiled function - Function* process_build_batch_fn = codegen->get_function( - IRFunction::HASH_JOIN_PROCESS_BUILD_BATCH); - DCHECK(process_build_batch_fn != NULL); - - // Codegen for evaluating build rows - Function* eval_row_fn = _hash_tbl->codegen_eval_tuple_row(state, true); - if (eval_row_fn == NULL) { - return NULL; - } - - int replaced = 0; - // Replace call sites - process_build_batch_fn = codegen->replace_call_sites( - process_build_batch_fn, false, eval_row_fn, "eval_build_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_build_batch_fn = codegen->replace_call_sites( - process_build_batch_fn, false, hash_fn, "hash_current_row", &replaced); - DCHECK_EQ(replaced, 1); - - return codegen->optimize_function_with_exprs(process_build_batch_fn); -} - -Function* HashJoinNode::codegen_process_probe_batch(RuntimeState* state, Function* hash_fn) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get cross compiled function - Function* process_probe_batch_fn = - codegen->get_function(IRFunction::HASH_JOIN_PROCESS_PROBE_BATCH); - DCHECK(process_probe_batch_fn != NULL); - - // Codegen HashTable::Equals - Function* equals_fn = _hash_tbl->codegen_equals(state); - if (equals_fn == NULL) { - return NULL; - } - - // Codegen for evaluating build rows - Function* eval_row_fn = _hash_tbl->codegen_eval_tuple_row(state, false); - if (eval_row_fn == NULL) { - return NULL; - } - - // Codegen CreateOutputRow - Function* create_output_row_fn = codegen_create_output_row(codegen); - if (create_output_row_fn == NULL) { - return NULL; - } - - // Codegen evaluating other join conjuncts - Function* eval_other_conjuncts_fn = ExecNode::codegen_eval_conjuncts( - state, _other_join_conjunct_ctxs, "EvalOtherConjuncts"); - if (eval_other_conjuncts_fn == NULL) { - return NULL; - } - - // Codegen evaluating conjuncts - Function* eval_conjuncts_fn = ExecNode::codegen_eval_conjuncts(state, _conjunct_ctxs); - if (eval_conjuncts_fn == NULL) { - return NULL; - } - - // Replace all call sites with codegen version - int replaced = 0; - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, hash_fn, "hash_current_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, eval_row_fn, "eval_probe_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, create_output_row_fn, "create_output_row", &replaced); - // TODO(zc): add semi join - DCHECK_EQ(replaced, 2); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, eval_conjuncts_fn, "eval_conjuncts", &replaced); - DCHECK_EQ(replaced, 2); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, eval_other_conjuncts_fn, - "eval_other_join_conjuncts", &replaced); - // TODO(zc): add semi join - DCHECK_EQ(replaced, 1); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, equals_fn, "equals", &replaced); - DCHECK_EQ(replaced, 2); - - return codegen->optimize_function_with_exprs(process_probe_batch_fn); -} - } diff --git a/be/src/exec/hash_join_node.h b/be/src/exec/hash_join_node.h index b00c60881d..f4b703b7e8 100644 --- a/be/src/exec/hash_join_node.h +++ b/be/src/exec/hash_join_node.h @@ -58,8 +58,6 @@ public: virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); virtual Status close(RuntimeState* state); - static const char* _s_llvm_class_name; - protected: void debug_string(int indentation_level, std::stringstream* out) const; @@ -117,9 +115,6 @@ private: // This should be the same size as the probe tuple row. int _result_tuple_row_size; - /// llvm function for build batch - llvm::Function* _codegen_process_build_batch_fn; - // Function declaration for codegen'd function. Signature must match // HashJoinNode::ProcessBuildBatch typedef void (*ProcessBuildBatchFn)(HashJoinNode*, RowBatch*); @@ -178,21 +173,6 @@ private: // This is only used for debugging and outputting the left child rows before // doing the join. std::string get_probe_row_output_string(TupleRow* probe_row); - - /// Codegen function to create output row - llvm::Function* codegen_create_output_row(LlvmCodeGen* codegen); - - /// Codegen processing build batches. Identical signature to ProcessBuildBatch. - /// hash_fn is the codegen'd function for computing hashes over tuple rows in the - /// hash table. - /// Returns NULL if codegen was not possible. - llvm::Function* codegen_process_build_batch(RuntimeState* state, llvm::Function* hash_fn); - - /// Codegen processing probe batches. Identical signature to ProcessProbeBatch. - /// hash_fn is the codegen'd function for computing hashes over tuple rows in the - /// hash table. - /// Returns NULL if codegen was not possible. - llvm::Function* codegen_process_probe_batch(RuntimeState* state, llvm::Function* hash_fn); }; } diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 0c805f7cb8..975c493d6f 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -18,7 +18,6 @@ #include "exec/hash_table.hpp" #include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exprs/expr.h" #include "runtime/raw_value.h" @@ -27,18 +26,9 @@ #include "runtime/runtime_state.h" #include "util/doris_metrics.h" -using llvm::BasicBlock; -using llvm::Value; -using llvm::Function; -using llvm::Type; -using llvm::PointerType; -using llvm::LLVMContext; -using llvm::PHINode; - namespace doris { const float HashTable::MAX_BUCKET_OCCUPANCY_FRACTION = 0.75f; -const char* HashTable::_s_llvm_class_name = "class.doris::HashTable"; HashTable::HashTable(const vector& build_expr_ctxs, const vector& probe_expr_ctxs, @@ -328,493 +318,4 @@ std::string HashTable::debug_string(bool skip_empty, const RowDescriptor* desc) return ss.str(); } -// Helper function to store a value into the results buffer if the expr -// evaluated to NULL. We don't want (NULL, 1) to hash to the same as (0,1) so -// we'll pick a more random value. -static void codegen_assign_null_value( - LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder, - Value* dst, const TypeDescriptor& type) { - int64_t fvn_seed = HashUtil::FNV_SEED; - - if (type.type == TYPE_CHAR || type.type == TYPE_VARCHAR) { - Value* dst_ptr = builder->CreateStructGEP(dst, 0, "string_ptr"); - Value* dst_len = builder->CreateStructGEP(dst, 1, "string_len"); - Value* null_len = codegen->get_int_constant(TYPE_INT, fvn_seed); - Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); - builder->CreateStore(null_ptr, dst_ptr); - builder->CreateStore(null_len, dst_len); - return; - } else { - Value* null_value = NULL; - // Get a type specific representation of fvn_seed - switch (type.type) { - case TYPE_BOOLEAN: - // In results, booleans are stored as 1 byte - dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->get_int_constant(TYPE_TINYINT, fvn_seed); - break; - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - null_value = codegen->get_int_constant(type.type, fvn_seed); - break; - case TYPE_FLOAT: { - // Don't care about the value, just the bit pattern - float fvn_seed_float = *reinterpret_cast(&fvn_seed); - null_value = llvm::ConstantFP::get( - codegen->context(), llvm::APFloat(fvn_seed_float)); - break; - } - case TYPE_DOUBLE: { - // Don't care about the value, just the bit pattern - double fvn_seed_double = *reinterpret_cast(&fvn_seed); - null_value = llvm::ConstantFP::get( - codegen->context(), llvm::APFloat(fvn_seed_double)); - break; - } - default: - DCHECK(false); - } - builder->CreateStore(null_value, dst); - } -} - -// Codegen for evaluating a tuple row over either _build_expr_ctxs or _probe_expr_ctxs. -// For the case where we are joining on a single int, the IR looks like -// define i1 @EvaBuildRow(%"class.impala::HashTable"* %this_ptr, -// %"class.impala::TupleRow"* %row) { -// entry: -// %null_ptr = alloca i1 -// %0 = bitcast %"class.doris::TupleRow"* %row to i8** -// %eval = call i32 @SlotRef(i8** %0, i8* null, i1* %null_ptr) -// %1 = load i1* %null_ptr -// br i1 %1, label %null, label %not_null -// -// null: ; preds = %entry -// ret i1 true -// -// not_null: ; preds = %entry -// store i32 %eval, i32* inttoptr (i64 46146336 to i32*) -// br label %continue -// -// continue: ; preds = %not_null -// %2 = zext i1 %1 to i8 -// store i8 %2, i8* inttoptr (i64 46146248 to i8*) -// ret i1 false -// } -// For each expr, we create 3 code blocks. The null, not null and continue blocks. -// Both the null and not null branch into the continue block. The continue block -// becomes the start of the next block for codegen (either the next expr or just the -// end of the function). -Function* HashTable::codegen_eval_tuple_row(RuntimeState* state, bool build) { - // TODO: codegen_assign_null_value() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet - const std::vector& ctxs = build ? _build_expr_ctxs : _probe_expr_ctxs; - for (int i = 0; i < ctxs.size(); ++i) { - PrimitiveType type = ctxs[i]->root()->type().type; - if (type == TYPE_DATE || type == TYPE_DATETIME - || type == TYPE_DECIMAL || type == TYPE_CHAR || type == TYPE_DECIMALV2) { - return NULL; - } - } - - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(HashTable::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype( - codegen, build ? "eval_build_row" : "eval_probe_row", codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - - Value* row = args[1]; - Value* has_null = codegen->false_value(); - - // Aggregation with no grouping exprs also use the hash table interface for - // code simplicity. In that case, there are no build exprs. - if (!_build_expr_ctxs.empty()) { - const std::vector& ctxs = build ? _build_expr_ctxs : _probe_expr_ctxs; - for (int i = 0; i < ctxs.size(); ++i) { - // TODO: refactor this to somewhere else? This is not hash table specific - // except for the null handling bit and would be used for anyone that needs - // to materialize a vector of exprs - // Convert result buffer to llvm ptr type - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ctxs[i]->root()->type()), loc); - - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // Call expr - Function* expr_fn = NULL; - Status status = ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - std::stringstream ss; - ss << "Problem with codegen: " << status.get_error_msg(); - // TODO(zc ) - // state->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str())); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, ctxs[i]->root()->type(), - expr_fn, expr_fn_args, "result", NULL); - Value* is_null = result.get_is_null(); - - // Set null-byte result - Value* null_byte = builder.CreateZExt(is_null, codegen->get_type(TYPE_TINYINT)); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - builder.CreateStore(null_byte, llvm_null_byte_loc); - - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - if (!_stores_nulls) { - // hash table doesn't store nulls, no reason to keep evaluating exprs - builder.CreateRet(codegen->true_value()); - } else { - codegen_assign_null_value(codegen, &builder, llvm_loc, ctxs[i]->root()->type()); - has_null = codegen->true_value(); - builder.CreateBr(continue_block); - } - - // Not null block - builder.SetInsertPoint(not_null_block); - result.to_native_ptr(llvm_loc); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(continue_block); - } - } - builder.CreateRet(has_null); - - return codegen->finalize_function(fn); -} - -// Codegen for hashing the current row. In the case with both string and non-string data -// (group by int_col, string_col), the IR looks like: -// define i32 @hash_current_row(%"class.impala::HashTable"* %this_ptr) { -// entry: -// %0 = call i32 @IrCrcHash(i8* inttoptr (i64 51107808 to i8*), i32 16, i32 0) -// %1 = load i8* inttoptr (i64 29500112 to i8*) -// %2 = icmp ne i8 %1, 0 -// br i1 %2, label %null, label %not_null -// -// null: ; preds = %entry -// %3 = call i32 @IrCrcHash(i8* inttoptr (i64 51107824 to i8*), i32 16, i32 %0) -// br label %continue -// -// not_null: ; preds = %entry -// %4 = load i8** getelementptr inbounds ( -// %"struct.impala::StringValue"* inttoptr -// (i64 51107824 to %"struct.impala::StringValue"*), i32 0, i32 0) -// %5 = load i32* getelementptr inbounds ( -// %"struct.impala::StringValue"* inttoptr -// (i64 51107824 to %"struct.impala::StringValue"*), i32 0, i32 1) -// %6 = call i32 @IrCrcHash(i8* %4, i32 %5, i32 %0) -// br label %continue -// -// continue: ; preds = %not_null, %null -// %7 = phi i32 [ %6, %not_null ], [ %3, %null ] -// ret i32 %7 -// } -// TODO: can this be cross-compiled? -Function* HashTable::codegen_hash_current_row(RuntimeState* state) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) { - return NULL; - } - } - - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get types to generate function prototype - Type* this_type = codegen->get_type(HashTable::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, "hash_current_row", codegen->get_type(TYPE_INT)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* this_arg = NULL; - Function* fn = prototype.generate_prototype(&builder, &this_arg); - - Value* hash_result = codegen->get_int_constant(TYPE_INT, _initial_seed); - Value* data = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), _expr_values_buffer); - if (_var_result_begin == -1) { - // No variable length slots, just hash what is in '_expr_values_buffer' - if (_results_buffer_size > 0) { - Function* hash_fn = codegen->get_hash_function(_results_buffer_size); - Value* len = codegen->get_int_constant(TYPE_INT, _results_buffer_size); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result); - } - } else { - if (_var_result_begin > 0) { - Function* hash_fn = codegen->get_hash_function(_var_result_begin); - Value* len = codegen->get_int_constant(TYPE_INT, _var_result_begin); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result); - } - - // Hash string slots - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - if (_build_expr_ctxs[i]->root()->type().type != TYPE_CHAR - && _build_expr_ctxs[i]->root()->type().type != TYPE_VARCHAR) { - continue; - } - - BasicBlock* null_block = NULL; - BasicBlock* not_null_block = NULL; - BasicBlock* continue_block = NULL; - Value* str_null_result = NULL; - - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - - // If the hash table stores nulls, we need to check if the stringval - // evaluated to NULL - if (_stores_nulls) { - null_block = BasicBlock::Create(context, "null", fn); - not_null_block = BasicBlock::Create(context, "not_null", fn); - continue_block = BasicBlock::Create(context, "continue", fn); - - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - Value* is_null = builder.CreateICmpNE( - null_byte, codegen->get_int_constant(TYPE_TINYINT, 0)); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // For null, we just want to call the hash function on the portion of - // the data - builder.SetInsertPoint(null_block); - Function* null_hash_fn = codegen->get_hash_function(sizeof(StringValue)); - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), loc); - Value* len = codegen->get_int_constant(TYPE_INT, sizeof(StringValue)); - str_null_result = builder.CreateCall3(null_hash_fn, llvm_loc, len, hash_result); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(not_null_block); - } - - // Convert _expr_values_buffer loc to llvm value - Value* str_val = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(TYPE_VARCHAR), loc); - - Value* ptr = builder.CreateStructGEP(str_val, 0, "ptr"); - Value* len = builder.CreateStructGEP(str_val, 1, "len"); - ptr = builder.CreateLoad(ptr); - len = builder.CreateLoad(len); - - // Call hash(ptr, len, hash_result); - Function* general_hash_fn = codegen->get_hash_function(); - Value* string_hash_result = - builder.CreateCall3(general_hash_fn, ptr, len, hash_result); - - if (_stores_nulls) { - builder.CreateBr(continue_block); - builder.SetInsertPoint(continue_block); - // Use phi node to reconcile that we could have come from the string-null - // path and string not null paths. - PHINode* phi_node = builder.CreatePHI(codegen->get_type(TYPE_INT), 2); - phi_node->addIncoming(string_hash_result, not_null_block); - phi_node->addIncoming(str_null_result, null_block); - hash_result = phi_node; - } else { - hash_result = string_hash_result; - } - } - } - - builder.CreateRet(hash_result); - return codegen->finalize_function(fn); -} - -// Codegen for HashTable::Equals. For a hash table with two exprs (string,int), the -// IR looks like: -// -// define i1 @Equals(%"class.impala::OldHashTable"* %this_ptr, -// %"class.impala::TupleRow"* %row) { -// entry: -// %result = call i64 @get_slot_ref(%"class.impala::ExprContext"* inttoptr -// (i64 146381856 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %0 = trunc i64 %result to i1 -// br i1 %0, label %null, label %not_null -// -// false_block: ; preds = %not_null2, %null1, %not_null, %null -// ret i1 false -// -// null: ; preds = %entry -// br i1 false, label %continue, label %false_block -// -// not_null: ; preds = %entry -// %1 = load i32* inttoptr (i64 104774368 to i32*) -// %2 = ashr i64 %result, 32 -// %3 = trunc i64 %2 to i32 -// %cmp_raw = icmp eq i32 %3, %1 -// br i1 %cmp_raw, label %continue, label %false_block -// -// continue: ; preds = %not_null, %null -// %result4 = call { i64, i8* } @get_slot_ref( -// %"class.impala::ExprContext"* inttoptr -// (i64 146381696 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %4 = extractvalue { i64, i8* } %result4, 0 -// %5 = trunc i64 %4 to i1 -// br i1 %5, label %null1, label %not_null2 -// -// null1: ; preds = %continue -// br i1 false, label %continue3, label %false_block -// -// not_null2: ; preds = %continue -// %6 = extractvalue { i64, i8* } %result4, 0 -// %7 = ashr i64 %6, 32 -// %8 = trunc i64 %7 to i32 -// %result5 = extractvalue { i64, i8* } %result4, 1 -// %cmp_raw6 = call i1 @_Z11StringValEQPciPKN6impala11StringValueE( -// i8* %result5, i32 %8, %"struct.impala::StringValue"* inttoptr -// (i64 104774384 to %"struct.impala::StringValue"*)) -// br i1 %cmp_raw6, label %continue3, label %false_block -// -// continue3: ; preds = %not_null2, %null1 -// ret i1 true -// } -Function* HashTable::codegen_equals(RuntimeState* state) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) { - return NULL; - } - } - - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(HashTable::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, "equals", codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* row = args[1]; - - if (!_build_expr_ctxs.empty()) { - BasicBlock* false_block = BasicBlock::Create(context, "false_block", fn); - - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // call GetValue on build_exprs[i] - Function* expr_fn = NULL; - Status status = _build_expr_ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - std::stringstream ss; - ss << "Problem with codegen: " << status.get_error_msg(); - // TODO(zc) - // state->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str())); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), _build_expr_ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, _build_expr_ctxs[i]->root()->type(), - expr_fn, expr_fn_args, "result", NULL); - Value* is_null = result.get_is_null(); - - // Determine if probe is null (i.e. _expr_value_null_bits[i] == true). In - // the case where the hash table does not store nulls, this is always false. - Value* probe_is_null = codegen->false_value(); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - if (_stores_nulls) { - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - probe_is_null = builder.CreateICmpNE( - null_byte, codegen->get_int_constant(TYPE_TINYINT, 0)); - } - - // Get llvm value for probe_val from '_expr_values_buffer' - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* probe_val = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(_build_expr_ctxs[i]->root()->type()), loc); - - // Branch for GetValue() returning NULL - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - builder.CreateCondBr(probe_is_null, continue_block, false_block); - - // Not-null block - builder.SetInsertPoint(not_null_block); - if (_stores_nulls) { - BasicBlock* cmp_block = BasicBlock::Create(context, "cmp", fn); - // First need to compare that probe expr[i] is not null - builder.CreateCondBr(probe_is_null, false_block, cmp_block); - builder.SetInsertPoint(cmp_block); - } - // Check result == probe_val - Value* is_equal = result.eq_to_native_ptr(probe_val); - builder.CreateCondBr(is_equal, continue_block, false_block); - - builder.SetInsertPoint(continue_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - } else { - builder.CreateRet(codegen->true_value()); - } - - return codegen->finalize_function(fn); -} - } diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h index 0b3168871d..3d4b502975 100644 --- a/be/src/exec/hash_table.h +++ b/be/src/exec/hash_table.h @@ -25,17 +25,10 @@ #include "common/logging.h" #include "util/hash_util.hpp" -namespace llvm { - -class Function; - -} - namespace doris { class Expr; class ExprContext; -class LlvmCodeGen; class RowDescriptor; class Tuple; class TupleRow; @@ -179,21 +172,6 @@ public: return Iterator(); } - /// Codegen for evaluating a tuple row. Codegen'd function matches the signature - /// for EvalBuildRow and EvalTupleRow. - /// if build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs - llvm::Function* codegen_eval_tuple_row(RuntimeState* state, bool build_row); - - /// Codegen for hashing the expr values in '_expr_values_buffer'. Function - /// prototype matches hash_current_row identically. - llvm::Function* codegen_hash_current_row(RuntimeState* state); - - /// Codegen for evaluating a TupleRow and comparing equality against - /// '_expr_values_buffer'. Function signature matches HashTable::Equals() - llvm::Function* codegen_equals(RuntimeState* state); - - static const char* _s_llvm_class_name; - // Dump out the entire hash table to string. If skip_empty, empty buckets are // skipped. If build_desc is non-null, the build rows will be output. Otherwise // just the build row addresses. diff --git a/be/src/exec/merge_join_node.h b/be/src/exec/merge_join_node.h index 82a7214211..758c9fa3fb 100644 --- a/be/src/exec/merge_join_node.h +++ b/be/src/exec/merge_join_node.h @@ -46,8 +46,6 @@ public: virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); virtual Status close(RuntimeState* state); - static const char* LLVM_CLASS_NAME; - protected: void debug_string(int indentation_level, std::stringstream* out) const; diff --git a/be/src/exec/new_partitioned_aggregation_node.cc b/be/src/exec/new_partitioned_aggregation_node.cc index 9ea0826135..689567cecf 100644 --- a/be/src/exec/new_partitioned_aggregation_node.cc +++ b/be/src/exec/new_partitioned_aggregation_node.cc @@ -23,7 +23,6 @@ #include //#include "codegen/codegen_anyval.h" -//#include "codegen/llvm_codegen.h" #include "exec/new_partitioned_hash_table.h" #include "exec/new_partitioned_hash_table.inline.h" #include "exprs/new_agg_fn_evaluator.h" @@ -50,14 +49,10 @@ #include "common/names.h" -// using namespace llvm; using namespace strings; namespace doris { -const char* NewPartitionedAggregationNode::LLVM_CLASS_NAME = - "class.doris::NewPartitionedAggregationNode"; - /// The minimum reduction factor (input rows divided by output rows) to grow hash tables /// in a streaming preaggregation, given that the hash tables are currently the given /// size or above. The sizes roughly correspond to hash table sizes where the bucket @@ -237,20 +232,6 @@ Status NewPartitionedAggregationNode::prepare(RuntimeState* state) { return Status::OK(); } -//void NewPartitionedAggregationNode::Codegen(RuntimeState* state) { -// DCHECK(state->ShouldCodegen()); -// ExecNode::Codegen(state); -// if (IsNodeCodegenDisabled()) return; -// -// LlvmCodeGen* codegen = state->codegen(); -// DCHECK(codegen != NULL); -// TPrefetchMode::type prefetch_mode = state_->query_options().prefetch_mode; -// Status codegen_status = is_streaming_preagg_ ? -// CodegenProcessBatchStreaming(codegen, prefetch_mode) : -// CodegenProcessBatch(codegen, prefetch_mode); -// runtime_profile()->AddCodegenMsg(codegen_status.ok(), codegen_status); -//} - Status NewPartitionedAggregationNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); // Open the child before consuming resources in this node. @@ -1470,605 +1451,6 @@ void NewPartitionedAggregationNode::ClosePartitions() { // return ExecNode::QueryMaintenance(state); //} -#if 0 - -// IR Generation for updating a single aggregation slot. Signature is: -// void UpdateSlot(FunctionContext* agg_fn_ctx, ExprContext* agg_expr_ctx, -// AggTuple* agg_tuple, char** row) -// -// The IR for sum(double_col), which is constructed directly with the IRBuilder, is: -// -// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"** %agg_expr_ctxs, -// { i8, [7 x i8], double }* %agg_tuple, %"class.impala::TupleRow"* %row) #34 { -// entry: -// %expr_ctx_ptr = getelementptr %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %agg_expr_ctxs, i32 0 -// %expr_ctx = load %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %expr_ctx_ptr -// %input0 = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx, -// %"class.impala::TupleRow"* %row) -// %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], double }, -// { i8, [7 x i8], double }* %agg_tuple, i32 0, i32 2 -// %dst_val = load double, double* %dst_slot_ptr -// %0 = extractvalue { i8, double } %input0, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %not_null -// -// ret: ; preds = %not_null, %entry -// ret void -// -// not_null: ; preds = %entry -// %val = extractvalue { i8, double } %input0, 1 -// %1 = fadd double %dst_val, %val -// %2 = bitcast { i8, [7 x i8], double }* %agg_tuple to i8* -// %null_byte_ptr = getelementptr i8, i8* %2, i32 0 -// %null_byte = load i8, i8* %null_byte_ptr -// %null_bit_cleared = and i8 %null_byte, -2 -// store i8 %null_bit_cleared, i8* %null_byte_ptr -// store double %1, double* %dst_slot_ptr -// br label %ret -// } -// -// The IR for min(timestamp_col), which uses the UDA interface, is: -// -// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"** %agg_expr_ctxs, -// { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple, -// %"class.impala::TupleRow"* %row) #34 { -// entry: -// %dst_lowered_ptr = alloca { i64, i64 } -// %input_lowered_ptr = alloca { i64, i64 } -// %expr_ctx_ptr = getelementptr %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %agg_expr_ctxs, i32 0 -// %expr_ctx = load %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %expr_ctx_ptr -// %input0 = call { i64, i64 } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx, -// %"class.impala::TupleRow"* %row) -// %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], -// %"class.impala::TimestampValue" }, { i8, [7 x i8], -// %"class.impala::TimestampValue" }* %agg_tuple, i32 0, i32 2 -// %dst_val = load %"class.impala::TimestampValue", -// %"class.impala::TimestampValue"* %dst_slot_ptr -// %0 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* -// %null_byte_ptr = getelementptr i8, i8* %0, i32 0 -// %null_byte = load i8, i8* %null_byte_ptr -// %null_mask = and i8 %null_byte, 1 -// %is_null = icmp ne i8 %null_mask, 0 -// %is_null_ext = zext i1 %is_null to i64 -// %1 = or i64 0, %is_null_ext -// %dst = insertvalue { i64, i64 } zeroinitializer, i64 %1, 0 -// %time_of_day = extractvalue %"class.impala::TimestampValue" %dst_val, 0, 0, 0, 0 -// %dst1 = insertvalue { i64, i64 } %dst, i64 %time_of_day, 1 -// %date = extractvalue %"class.impala::TimestampValue" %dst_val, 1, 0, 0 -// %2 = extractvalue { i64, i64 } %dst1, 0 -// %3 = zext i32 %date to i64 -// %4 = shl i64 %3, 32 -// %5 = and i64 %2, 4294967295 -// %6 = or i64 %5, %4 -// %dst2 = insertvalue { i64, i64 } %dst1, i64 %6, 0 -// store { i64, i64 } %input0, { i64, i64 }* %input_lowered_ptr -// %input_unlowered_ptr = bitcast { i64, i64 }* %input_lowered_ptr -// to %"struct.impala_udf::TimestampVal"* -// store { i64, i64 } %dst2, { i64, i64 }* %dst_lowered_ptr -// %dst_unlowered_ptr = bitcast { i64, i64 }* %dst_lowered_ptr -// to %"struct.impala_udf::TimestampVal"* -// call void -// @_ZN6impala18AggregateFunctions3MinIN10impala_udf12TimestampValEEEvPNS2_15FunctionContextERKT_PS6_.2( -// %"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"struct.impala_udf::TimestampVal"* %input_unlowered_ptr, -// %"struct.impala_udf::TimestampVal"* %dst_unlowered_ptr) -// %anyval_result = load { i64, i64 }, { i64, i64 }* %dst_lowered_ptr -// %7 = extractvalue { i64, i64 } %anyval_result, 1 -// %8 = insertvalue %"class.impala::TimestampValue" zeroinitializer, i64 %7, 0, 0, 0, 0 -// %9 = extractvalue { i64, i64 } %anyval_result, 0 -// %10 = ashr i64 %9, 32 -// %11 = trunc i64 %10 to i32 -// %12 = insertvalue %"class.impala::TimestampValue" %8, i32 %11, 1, 0, 0 -// %13 = extractvalue { i64, i64 } %anyval_result, 0 -// %result_is_null = trunc i64 %13 to i1 -// %14 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* -// %null_byte_ptr3 = getelementptr i8, i8* %14, i32 0 -// %null_byte4 = load i8, i8* %null_byte_ptr3 -// %null_bit_cleared = and i8 %null_byte4, -2 -// %15 = sext i1 %result_is_null to i8 -// %null_bit = and i8 %15, 1 -// %null_bit_set = or i8 %null_bit_cleared, %null_bit -// store i8 %null_bit_set, i8* %null_byte_ptr3 -// store %"class.impala::TimestampValue" %12, -// %"class.impala::TimestampValue"* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %entry -// ret void -// } -// -//Status NewPartitionedAggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen, -// NewAggFnEvaluator* evaluator, int evaluator_idx, SlotDescriptor* slot_desc, -// Function** fn) { -// PointerType* fn_ctx_type = -// codegen->GetPtrType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME); -// PointerType* expr_ctxs_type = -// codegen->GetPtrPtrType(codegen->GetType(ExprContext::LLVM_CLASS_NAME)); -// StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen); -// if (tuple_struct == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateSlot(): failed to generate " -// "intermediate tuple desc"); -// } -// PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_struct); -// PointerType* tuple_row_ptr_type = codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME); -// -// // Create UpdateSlot prototype -// LlvmCodeGen::FnPrototype prototype(codegen, "UpdateSlot", codegen->void_type()); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_fn_ctx", fn_ctx_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_expr_ctxs", expr_ctxs_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); -// -// LlvmBuilder builder(codegen->context()); -// Value* args[4]; -// *fn = prototype.GeneratePrototype(&builder, &args[0]); -// Value* agg_fn_ctx_arg = args[0]; -// Value* agg_expr_ctxs_arg = args[1]; -// Value* agg_tuple_arg = args[2]; -// Value* row_arg = args[3]; -// -// DCHECK_GE(evaluator->input_expr_ctxs().size(), 1); -// vector input_vals; -// for (int i = 0; i < evaluator->input_expr_ctxs().size(); ++i) { -// ExprContext* agg_expr_ctx = evaluator->input_expr_ctxs()[i]; -// Expr* agg_expr = agg_expr_ctx->root(); -// Function* agg_expr_fn; -// RETURN_IF_ERROR(agg_expr->GetCodegendComputeFn(codegen, &agg_expr_fn)); -// DCHECK(agg_expr_fn != NULL); -// -// // Call expr function with the matching expr context to get src slot value. -// Value* expr_ctx_ptr = builder.CreateInBoundsGEP( -// agg_expr_ctxs_arg, codegen->GetIntConstant(TYPE_INT, i), "expr_ctx_ptr"); -// Value* expr_ctx = builder.CreateLoad(expr_ctx_ptr, "expr_ctx"); -// string input_name = Substitute("input$0", i); -// input_vals.push_back( -// CodegenAnyVal::CreateCallWrapped(codegen, &builder, agg_expr->type(), agg_expr_fn, -// ArrayRef({expr_ctx, row_arg}), input_name.c_str())); -// } -// -// NewAggFnEvaluator::AggregationOp agg_op = evaluator->agg_op(); -// const ColumnType& dst_type = evaluator->intermediate_type(); -// bool dst_is_int_or_float_or_bool = dst_type.IsIntegerType() -// || dst_type.IsFloatingPointType() || dst_type.IsBooleanType(); -// bool dst_is_numeric_or_bool = dst_is_int_or_float_or_bool || dst_type.IsDecimalType(); -// -// BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", *fn); -// -// // Emit the code to compute 'result' and set the NULL indicator if needed. First check -// // for special cases where we can emit a very simple instruction sequence, then fall -// // back to the general-purpose approach of calling the cross-compiled builtin UDA. -// CodegenAnyVal& src = input_vals[0]; -// // 'dst_slot_ptr' points to the slot in the aggregate tuple to update. -// Value* dst_slot_ptr = builder.CreateStructGEP( -// NULL, agg_tuple_arg, slot_desc->llvm_field_idx(), "dst_slot_ptr"); -// Value* result = NULL; -// Value* dst_value = builder.CreateLoad(dst_slot_ptr, "dst_val"); -// if (agg_op == NewAggFnEvaluator::COUNT) { -// src.CodegenBranchIfNull(&builder, ret_block); -// if (evaluator->is_merge()) { -// result = builder.CreateAdd(dst_value, src.GetVal(), "count_sum"); -// } else { -// result = builder.CreateAdd( -// dst_value, codegen->GetIntConstant(TYPE_BIGINT, 1), "count_inc"); -// } -// DCHECK(!slot_desc->is_nullable()); -// } else if ((agg_op == NewAggFnEvaluator::MIN || agg_op == NewAggFnEvaluator::MAX) -// && dst_is_numeric_or_bool) { -// bool is_min = agg_op == NewAggFnEvaluator::MIN; -// src.CodegenBranchIfNull(&builder, ret_block); -// Function* min_max_fn = codegen->CodegenMinMax(slot_desc->type(), is_min); -// Value* min_max_args[] = {dst_value, src.GetVal()}; -// result = -// builder.CreateCall(min_max_fn, min_max_args, is_min ? "min_value" : "max_value"); -// // Dst may have been NULL, make sure to unset the NULL bit. -// DCHECK(slot_desc->is_nullable()); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, codegen->false_value()); -// } else if (agg_op == NewAggFnEvaluator::SUM && dst_is_int_or_float_or_bool) { -// src.CodegenBranchIfNull(&builder, ret_block); -// if (dst_type.IsFloatingPointType()) { -// result = builder.CreateFAdd(dst_value, src.GetVal()); -// } else { -// result = builder.CreateAdd(dst_value, src.GetVal()); -// } -// // Dst may have been NULL, make sure to unset the NULL bit. -// DCHECK(slot_desc->is_nullable()); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, codegen->false_value()); -// } else { -// // The remaining cases are implemented using the UDA interface. -// // Create intermediate argument 'dst' from 'dst_value' -// CodegenAnyVal dst = CodegenAnyVal::GetNonNullVal(codegen, &builder, dst_type, "dst"); -// -// // For a subset of builtins we generate a different code sequence that exploits two -// // properties of the builtins. First, NULL input values can be skipped. Second, the -// // value of the slot was initialized in the right way in InitAggSlots() (e.g. 0 for -// // SUM) that we get the right result if UpdateSlot() pretends that the NULL bit of -// // 'dst' is unset. Empirically this optimisation makes TPC-H Q1 5-10% faster. -// bool special_null_handling = !evaluator->intermediate_type().IsStringType() -// && !evaluator->intermediate_type().IsTimestampType() -// && (agg_op == NewAggFnEvaluator::MIN || agg_op == NewAggFnEvaluator::MAX -// || agg_op == NewAggFnEvaluator::SUM || agg_op == NewAggFnEvaluator::AVG -// || agg_op == NewAggFnEvaluator::NDV); -// if (slot_desc->is_nullable()) { -// if (special_null_handling) { -// src.CodegenBranchIfNull(&builder, ret_block); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, codegen->false_value()); -// } else { -// dst.SetIsNull(slot_desc->CodegenIsNull(codegen, &builder, agg_tuple_arg)); -// } -// } -// dst.SetFromRawValue(dst_value); -// -// // Call the UDA to update/merge 'src' into 'dst', with the result stored in -// // 'updated_dst_val'. -// CodegenAnyVal updated_dst_val; -// RETURN_IF_ERROR(CodegenCallUda(codegen, &builder, evaluator, agg_fn_ctx_arg, -// input_vals, dst, &updated_dst_val)); -// result = updated_dst_val.ToNativeValue(); -// -// if (slot_desc->is_nullable() && !special_null_handling) { -// // Set NULL bit in the slot based on the return value. -// Value* result_is_null = updated_dst_val.GetIsNull("result_is_null"); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, result_is_null); -// } -// } -// -// // TODO: Store to register in the loop and store once to memory at the end of the loop. -// builder.CreateStore(result, dst_slot_ptr); -// builder.CreateBr(ret_block); -// -// builder.SetInsertPoint(ret_block); -// builder.CreateRetVoid(); -// -// // Avoid producing huge UpdateTuple() function after inlining - LLVM's optimiser -// // memory/CPU usage scales super-linearly with function size. -// // E.g. compute stats on all columns of a 1000-column table previously took 4 minutes to -// // codegen because all the UpdateSlot() functions were inlined. -// if (evaluator_idx >= LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) { -// codegen->SetNoInline(*fn); -// } -// -// *fn = codegen->FinalizeFunction(*fn); -// if (*fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateSlot(): codegen'd " -// "UpdateSlot() function failed verification, see log"); -// } -// return Status::OK(); -//} -// -//Status NewPartitionedAggregationNode::CodegenCallUda(LlvmCodeGen* codegen, -// LlvmBuilder* builder, NewAggFnEvaluator* evaluator, Value* agg_fn_ctx_arg, -// const vector& input_vals, const CodegenAnyVal& dst, -// CodegenAnyVal* updated_dst_val) { -// DCHECK_EQ(evaluator->input_expr_ctxs().size(), input_vals.size()); -// Function* uda_fn; -// RETURN_IF_ERROR(evaluator->GetUpdateOrMergeFunction(codegen, &uda_fn)); -// -// // Set up arguments for call to UDA, which are the FunctionContext*, followed by -// // pointers to all input values, followed by a pointer to the destination value. -// vector uda_fn_args; -// uda_fn_args.push_back(agg_fn_ctx_arg); -// -// // Create pointers to input args to pass to uda_fn. We must use the unlowered type, -// // e.g. IntVal, because the UDA interface expects the values to be passed as const -// // references to the classes. -// for (int i = 0; i < evaluator->input_expr_ctxs().size(); ++i) { -// uda_fn_args.push_back(input_vals[i].GetUnloweredPtr("input_unlowered_ptr")); -// } -// -// // Create pointer to dst to pass to uda_fn. We must use the unlowered type for the -// // same reason as above. -// Value* dst_lowered_ptr = dst.GetLoweredPtr("dst_lowered_ptr"); -// const ColumnType& dst_type = evaluator->intermediate_type(); -// Type* dst_unlowered_ptr_type = CodegenAnyVal::GetUnloweredPtrType(codegen, dst_type); -// Value* dst_unlowered_ptr = builder->CreateBitCast( -// dst_lowered_ptr, dst_unlowered_ptr_type, "dst_unlowered_ptr"); -// uda_fn_args.push_back(dst_unlowered_ptr); -// -// // Call 'uda_fn' -// builder->CreateCall(uda_fn, uda_fn_args); -// -// // Convert intermediate 'dst_arg' back to the native type. -// Value* anyval_result = builder->CreateLoad(dst_lowered_ptr, "anyval_result"); -// -// *updated_dst_val = CodegenAnyVal(codegen, builder, dst_type, anyval_result); -// return Status::OK(); -//} - -// IR codegen for the UpdateTuple loop. This loop is query specific and based on the -// aggregate functions. The function signature must match the non- codegen'd UpdateTuple -// exactly. -// For the query: -// select count(*), count(int_col), sum(double_col) the IR looks like: -// -// ; Function Attrs: alwaysinline -// define void @UpdateTuple(%"class.impala::NewPartitionedAggregationNode"* %this_ptr, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, %"class.impala::Tuple"* -// %tuple, -// %"class.impala::TupleRow"* %row, i1 %is_merge) #34 { -// entry: -// %tuple1 = -// bitcast %"class.impala::Tuple"* %tuple to { i8, [7 x i8], i64, i64, double }* -// %src_slot = getelementptr inbounds { i8, [7 x i8], i64, i64, double }, -// { i8, [7 x i8], i64, i64, double }* %tuple1, i32 0, i32 2 -// %count_star_val = load i64, i64* %src_slot -// %count_star_inc = add i64 %count_star_val, 1 -// store i64 %count_star_inc, i64* %src_slot -// %0 = getelementptr %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 1 -// %agg_fn_ctx = load %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %0 -// %1 = call %"class.impala::ExprContext"** -// @_ZNK6impala26NewPartitionedAggregationNode18GetAggExprContextsEi( -// %"class.impala::NewPartitionedAggregationNode"* %this_ptr, i32 1) -// call void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"** %1, { i8, [7 x i8], i64, i64, double }* %tuple1, -// %"class.impala::TupleRow"* %row) -// %2 = getelementptr %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 2 -// %agg_fn_ctx2 = load %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %2 -// %3 = call %"class.impala::ExprContext"** -// @_ZNK6impala26NewPartitionedAggregationNode18GetAggExprContextsEi( -// %"class.impala::NewPartitionedAggregationNode"* %this_ptr, i32 2) -// call void @UpdateSlot.4(%"class.impala_udf::FunctionContext"* %agg_fn_ctx2, -// %"class.impala::ExprContext"** %3, { i8, [7 x i8], i64, i64, double }* %tuple1, -// %"class.impala::TupleRow"* %row) -// ret void -// } -//Status NewPartitionedAggregationNode::CodegenUpdateTuple( -// LlvmCodeGen* codegen, Function** fn) { -// SCOPED_TIMER(codegen->codegen_timer()); -// -// for (const SlotDescriptor* slot_desc : intermediate_tuple_desc_->slots()) { -// if (slot_desc->type().type == TYPE_CHAR) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateTuple(): cannot codegen" -// "CHAR in aggregations"); -// } -// } -// -// if (intermediate_tuple_desc_->GetLlvmStruct(codegen) == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateTuple(): failed to generate " -// "intermediate tuple desc"); -// } -// -// // Get the types to match the UpdateTuple signature -// Type* agg_node_type = codegen->GetType(NewPartitionedAggregationNode::LLVM_CLASS_NAME); -// Type* fn_ctx_type = codegen->GetType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME); -// Type* tuple_type = codegen->GetType(Tuple::LLVM_CLASS_NAME); -// Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME); -// -// PointerType* agg_node_ptr_type = codegen->GetPtrType(agg_node_type); -// PointerType* fn_ctx_ptr_ptr_type = codegen->GetPtrPtrType(fn_ctx_type); -// PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_type); -// PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type); -// -// StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen); -// PointerType* tuple_ptr = codegen->GetPtrType(tuple_struct); -// LlvmCodeGen::FnPrototype prototype(codegen, "UpdateTuple", codegen->void_type()); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", agg_node_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_fn_ctxs", fn_ctx_ptr_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("is_merge", codegen->boolean_type())); -// -// LlvmBuilder builder(codegen->context()); -// Value* args[5]; -// *fn = prototype.GeneratePrototype(&builder, &args[0]); -// Value* this_arg = args[0]; -// Value* agg_fn_ctxs_arg = args[1]; -// Value* tuple_arg = args[2]; -// Value* row_arg = args[3]; -// -// // Cast the parameter types to the internal llvm runtime types. -// // TODO: get rid of this by using right type in function signature -// tuple_arg = builder.CreateBitCast(tuple_arg, tuple_ptr, "tuple"); -// -// Function* get_expr_ctxs_fn = -// codegen->GetFunction(IRFunction::PART_AGG_NODE_GET_EXPR_CTXS, false); -// DCHECK(get_expr_ctxs_fn != NULL); -// -// // Loop over each expr and generate the IR for that slot. If the expr is not -// // count(*), generate a helper IR function to update the slot and call that. -// int j = grouping_expr_ctxs_.size(); -// for (int i = 0; i < aggregate_evaluators_.size(); ++i, ++j) { -// SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[j]; -// NewAggFnEvaluator* evaluator = aggregate_evaluators_[i]; -// if (evaluator->is_count_star()) { -// // TODO: we should be able to hoist this up to the loop over the batch and just -// // increment the slot by the number of rows in the batch. -// int field_idx = slot_desc->llvm_field_idx(); -// Value* const_one = codegen->GetIntConstant(TYPE_BIGINT, 1); -// Value* slot_ptr = builder.CreateStructGEP(NULL, tuple_arg, field_idx, "src_slot"); -// Value* slot_loaded = builder.CreateLoad(slot_ptr, "count_star_val"); -// Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc"); -// builder.CreateStore(count_inc, slot_ptr); -// } else { -// Function* update_slot_fn; -// RETURN_IF_ERROR( -// CodegenUpdateSlot(codegen, evaluator, i, slot_desc, &update_slot_fn)); -// Value* agg_fn_ctx_ptr = builder.CreateConstGEP1_32(agg_fn_ctxs_arg, i); -// Value* agg_fn_ctx = builder.CreateLoad(agg_fn_ctx_ptr, "agg_fn_ctx"); -// // Call GetExprCtx() to get the expression context. -// DCHECK(agg_expr_ctxs_[i] != NULL); -// Value* get_expr_ctxs_args[] = {this_arg, codegen->GetIntConstant(TYPE_INT, i)}; -// Value* agg_expr_ctxs = builder.CreateCall(get_expr_ctxs_fn, get_expr_ctxs_args); -// Value* update_slot_args[] = {agg_fn_ctx, agg_expr_ctxs, tuple_arg, row_arg}; -// builder.CreateCall(update_slot_fn, update_slot_args); -// } -// } -// builder.CreateRetVoid(); -// -// // Avoid inlining big UpdateTuple function into outer loop - we're unlikely to get -// // any benefit from it since the function call overhead will be amortized. -// if (aggregate_evaluators_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { -// codegen->SetNoInline(*fn); -// } -// -// // CodegenProcessBatch() does the final optimizations. -// *fn = codegen->FinalizeFunction(*fn); -// if (*fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateTuple(): codegen'd " -// "UpdateTuple() function failed verification, see log"); -// } -// return Status::OK(); -//} -// -//Status NewPartitionedAggregationNode::CodegenProcessBatch(LlvmCodeGen* codegen, -// TPrefetchMode::type prefetch_mode) { -// SCOPED_TIMER(codegen->codegen_timer()); -// -// Function* update_tuple_fn; -// RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn)); -// -// // Get the cross compiled update row batch function -// IRFunction::Type ir_fn = (!grouping_expr_ctxs_.empty() ? -// IRFunction::PART_AGG_NODE_PROCESS_BATCH_UNAGGREGATED : -// IRFunction::PART_AGG_NODE_PROCESS_BATCH_NO_GROUPING); -// Function* process_batch_fn = codegen->GetFunction(ir_fn, true); -// DCHECK(process_batch_fn != NULL); -// -// int replaced; -// if (!grouping_expr_ctxs_.empty()) { -// // Codegen for grouping using hash table -// -// // Replace prefetch_mode with constant so branches can be optimised out. -// Value* prefetch_mode_arg = codegen->GetArgument(process_batch_fn, 3); -// prefetch_mode_arg->replaceAllUsesWith( -// ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode)); -// -// // The codegen'd ProcessBatch function is only used in Open() with level_ = 0, -// // so don't use murmur hash -// Function* hash_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, /* use murmur */ false, &hash_fn)); -// -// // Codegen HashTable::Equals -// Function* build_equals_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, true, &build_equals_fn)); -// -// // Codegen for evaluating input rows -// Function* eval_grouping_expr_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(codegen, false, &eval_grouping_expr_fn)); -// -// // Replace call sites -// replaced = codegen->ReplaceCallSites(process_batch_fn, eval_grouping_expr_fn, -// "EvalProbeRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_fn, hash_fn, "HashRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_fn, build_equals_fn, "Equals"); -// DCHECK_EQ(replaced, 1); -// -// NewPartitionedHashTableCtx::HashTableReplacedConstants replaced_constants; -// const bool stores_duplicates = false; -// RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates, 1, -// process_batch_fn, &replaced_constants)); -// DCHECK_GE(replaced_constants.stores_nulls, 1); -// DCHECK_GE(replaced_constants.finds_some_nulls, 1); -// DCHECK_GE(replaced_constants.stores_duplicates, 1); -// DCHECK_GE(replaced_constants.stores_tuples, 1); -// DCHECK_GE(replaced_constants.quadratic_probing, 1); -// } -// -// replaced = codegen->ReplaceCallSites(process_batch_fn, update_tuple_fn, "UpdateTuple"); -// DCHECK_GE(replaced, 1); -// process_batch_fn = codegen->FinalizeFunction(process_batch_fn); -// if (process_batch_fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenProcessBatch(): codegen'd " -// "ProcessBatch() function failed verification, see log"); -// } -// -// void **codegened_fn_ptr = grouping_expr_ctxs_.empty() ? -// reinterpret_cast(&process_batch_no_grouping_fn_) : -// reinterpret_cast(&process_batch_fn_); -// codegen->AddFunctionToJit(process_batch_fn, codegened_fn_ptr); -// return Status::OK(); -//} -// -//Status NewPartitionedAggregationNode::CodegenProcessBatchStreaming( -// LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode) { -// DCHECK(is_streaming_preagg_); -// SCOPED_TIMER(codegen->codegen_timer()); -// -// IRFunction::Type ir_fn = IRFunction::PART_AGG_NODE_PROCESS_BATCH_STREAMING; -// Function* process_batch_streaming_fn = codegen->GetFunction(ir_fn, true); -// DCHECK(process_batch_streaming_fn != NULL); -// -// // Make needs_serialize arg constant so dead code can be optimised out. -// Value* needs_serialize_arg = codegen->GetArgument(process_batch_streaming_fn, 2); -// needs_serialize_arg->replaceAllUsesWith( -// ConstantInt::get(Type::getInt1Ty(codegen->context()), needs_serialize_)); -// -// // Replace prefetch_mode with constant so branches can be optimised out. -// Value* prefetch_mode_arg = codegen->GetArgument(process_batch_streaming_fn, 3); -// prefetch_mode_arg->replaceAllUsesWith( -// ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode)); -// -// Function* update_tuple_fn; -// RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn)); -// -// // We only use the top-level hash function for streaming aggregations. -// Function* hash_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, false, &hash_fn)); -// -// // Codegen HashTable::Equals -// Function* equals_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, true, &equals_fn)); -// -// // Codegen for evaluating input rows -// Function* eval_grouping_expr_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(codegen, false, &eval_grouping_expr_fn)); -// -// // Replace call sites -// int replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, update_tuple_fn, -// "UpdateTuple"); -// DCHECK_EQ(replaced, 2); -// -// replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, eval_grouping_expr_fn, -// "EvalProbeRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, hash_fn, "HashRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, equals_fn, "Equals"); -// DCHECK_EQ(replaced, 1); -// -// NewPartitionedHashTableCtx::HashTableReplacedConstants replaced_constants; -// const bool stores_duplicates = false; -// RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates, 1, -// process_batch_streaming_fn, &replaced_constants)); -// DCHECK_GE(replaced_constants.stores_nulls, 1); -// DCHECK_GE(replaced_constants.finds_some_nulls, 1); -// DCHECK_GE(replaced_constants.stores_duplicates, 1); -// DCHECK_GE(replaced_constants.stores_tuples, 1); -// DCHECK_GE(replaced_constants.quadratic_probing, 1); -// -// DCHECK(process_batch_streaming_fn != NULL); -// process_batch_streaming_fn = codegen->FinalizeFunction(process_batch_streaming_fn); -// if (process_batch_streaming_fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenProcessBatchStreaming(): codegen'd " -// "ProcessBatchStreaming() function failed verification, see log"); -// } -// -// codegen->AddFunctionToJit(process_batch_streaming_fn, -// reinterpret_cast(&process_batch_streaming_fn_)); -// return Status::OK(); -//} - -#endif - // Instantiate required templates. template Status NewPartitionedAggregationNode::AppendSpilledRow( Partition*, TupleRow*); diff --git a/be/src/exec/new_partitioned_aggregation_node.h b/be/src/exec/new_partitioned_aggregation_node.h index ac30c298d9..62a3da441e 100644 --- a/be/src/exec/new_partitioned_aggregation_node.h +++ b/be/src/exec/new_partitioned_aggregation_node.h @@ -30,19 +30,11 @@ #include "runtime/mem_pool.h" #include "runtime/string_value.h" -namespace llvm { -// class BasicBlock; -class Function; -// class Value; -} - namespace doris { class AggFn; class NewAggFnEvaluator; class CodegenAnyVal; -//class LlvmCodeGen; -//class LlvmBuilder; class RowBatch; class RuntimeState; struct StringValue; @@ -139,8 +131,6 @@ class NewPartitionedAggregationNode : public ExecNode { virtual Status reset(RuntimeState* state); virtual Status close(RuntimeState* state); - static const char* LLVM_CLASS_NAME; - protected: /// Frees local allocations from aggregate_evals_ and agg_fn_evals // virtual Status QueryMaintenance(RuntimeState* state); @@ -681,37 +671,6 @@ class NewPartitionedAggregationNode : public ExecNode { void CleanupHashTbl(const std::vector& agg_fn_evals, NewPartitionedHashTable::Iterator it); - /// Codegen UpdateSlot(). Returns non-OK status if codegen is unsuccessful. - /// Assumes is_merge = false; -// Status CodegenUpdateSlot(LlvmCodeGen* codegen, NewAggFnEvaluator* evaluator, -// int evaluator_idx, SlotDescriptor* slot_desc, llvm::Function** fn); - - /// Codegen a call to a function implementing the UDA interface with input values - /// from 'input_vals'. 'dst_val' should contain the previous value of the aggregate - /// function, and 'updated_dst_val' is set to the new value after the Update or Merge - /// operation is applied. The instruction sequence for the UDA call is inserted at - /// the insert position of 'builder'. -// Status CodegenCallUda(LlvmCodeGen* codegen, LlvmBuilder* builder, -// NewAggFnEvaluator* evaluator, llvm::Value* agg_fn_ctx_arg, -// const std::vector& input_vals, const CodegenAnyVal& dst_val, -// CodegenAnyVal* updated_dst_val); - - /// Codegen UpdateTuple(). Returns non-OK status if codegen is unsuccessful. -// Status CodegenUpdateTuple(LlvmCodeGen* codegen, llvm::Function** fn); - - /// Codegen the non-streaming process row batch loop. The loop has already been - /// compiled to IR and loaded into the codegen object. UpdateAggTuple has also been - /// codegen'd to IR. This function will modify the loop subsituting the statically - /// compiled functions with codegen'd ones. 'process_batch_fn_' or - /// 'process_batch_no_grouping_fn_' will be updated with the codegened function - /// depending on whether this is a grouping or non-grouping aggregation. - /// Assumes AGGREGATED_ROWS = false. -// Status CodegenProcessBatch(LlvmCodeGen* codegen); - - /// Codegen the materialization loop for streaming preaggregations. - /// 'process_batch_streaming_fn_' will be updated with the codegened function. -// Status CodegenProcessBatchStreaming(LlvmCodeGen* codegen); - /// Compute minimum buffer reservation for grouping aggregations. /// We need one buffer per partition, which is used either as the write buffer for the /// aggregated stream or the unaggregated stream. We need an additional buffer to read diff --git a/be/src/exec/new_partitioned_hash_table.cc b/be/src/exec/new_partitioned_hash_table.cc index 7cf735d94a..0c195257c2 100644 --- a/be/src/exec/new_partitioned_hash_table.cc +++ b/be/src/exec/new_partitioned_hash_table.cc @@ -21,8 +21,6 @@ #include #include -#include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exec/exec_node.h" #include "exprs/expr.h" #include "exprs/expr_context.h" @@ -37,13 +35,10 @@ #include "common/names.h" using namespace doris; -// using namespace llvm; using namespace strings; // DEFINE_bool(enable_quadratic_probing, true, "Enable quadratic probing hash table"); -const char* NewPartitionedHashTableCtx::LLVM_CLASS_NAME = "class.doris::NewPartitionedHashTableCtx"; - // Random primes to multiply the seed with. static uint32_t SEED_PRIMES[] = { 1, // First seed must be 1, level 0 is used by other operators in the fragment. @@ -623,642 +618,3 @@ string NewPartitionedHashTable::PrintStats() const { ss << "Resizes: " << num_resizes_ << std::endl; return ss.str(); } - -#if 0 - -// Helper function to store a value into the results buffer if the expr -// evaluated to NULL. We don't want (NULL, 1) to hash to the same as (0,1) so -// we'll pick a more random value. -static void CodegenAssignNullValue( - LlvmCodeGen* codegen, LlvmBuilder* builder, Value* dst, const ColumnType& type) { - uint64_t fnv_seed = HashUtil::FNV_SEED; - - if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) { - Value* dst_ptr = builder->CreateStructGEP(NULL, dst, 0, "string_ptr"); - Value* dst_len = builder->CreateStructGEP(NULL, dst, 1, "string_len"); - Value* null_len = codegen->GetIntConstant(TYPE_INT, fnv_seed); - Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); - builder->CreateStore(null_ptr, dst_ptr); - builder->CreateStore(null_len, dst_len); - } else { - Value* null_value = NULL; - int byte_size = type.GetByteSize(); - // Get a type specific representation of fnv_seed - switch (type.type) { - case TYPE_BOOLEAN: - // In results, booleans are stored as 1 byte - dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->GetIntConstant(TYPE_TINYINT, fnv_seed); - break; - case TYPE_TIMESTAMP: { - // Cast 'dst' to 'i128*' - DCHECK_EQ(byte_size, 16); - PointerType* fnv_seed_ptr_type = - codegen->GetPtrType(Type::getIntNTy(codegen->context(), byte_size * 8)); - dst = builder->CreateBitCast(dst, fnv_seed_ptr_type); - null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); - break; - } - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_DECIMAL: - null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); - break; - case TYPE_FLOAT: { - // Don't care about the value, just the bit pattern - float fnv_seed_float = *reinterpret_cast(&fnv_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_float)); - break; - } - case TYPE_DOUBLE: { - // Don't care about the value, just the bit pattern - double fnv_seed_double = *reinterpret_cast(&fnv_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_double)); - break; - } - default: - DCHECK(false); - } - builder->CreateStore(null_value, dst); - } -} - -// Codegen for evaluating a tuple row over either build_expr_ctxs_ or probe_expr_ctxs_. -// For a group by with (big int, string) the IR looks like: -// -// define i1 @EvalProbeRow(%"class.impala::NewPartitionedHashTableCtx"* %this_ptr, -// %"class.impala::TupleRow"* %row, i8* %expr_values, i8* %expr_values_null) #34 { -// entry: -// %loc_addr = getelementptr i8, i8* %expr_values, i32 0 -// %loc = bitcast i8* %loc_addr to i64* -// %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"* -// inttoptr (i64 197737664 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %0 = extractvalue { i8, i64 } %result, 0 -// %is_null = trunc i8 %0 to i1 -// %1 = zext i1 %is_null to i8 -// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0 -// store i8 %1, i8* %null_byte_loc -// br i1 %is_null, label %null, label %not_null -// -// null: ; preds = %entry -// store i64 2166136261, i64* %loc -// br label %continue -// -// not_null: ; preds = %entry -// %val = extractvalue { i8, i64 } %result, 1 -// store i64 %val, i64* %loc -// br label %continue -// -// continue: ; preds = %not_null, %null -// %is_null_phi = phi i1 [ true, %null ], [ false, %not_null ] -// %has_null = or i1 false, %is_null_phi -// %loc_addr1 = getelementptr i8, i8* %expr_values, i32 8 -// %loc2 = bitcast i8* %loc_addr1 to %"struct.impala::StringValue"* -// %result6 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"* -// inttoptr (i64 197738048 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %2 = extractvalue { i64, i8* } %result6, 0 -// %is_null7 = trunc i64 %2 to i1 -// %3 = zext i1 %is_null7 to i8 -// %null_byte_loc8 = getelementptr i8, i8* %expr_values_null, i32 1 -// store i8 %3, i8* %null_byte_loc8 -// br i1 %is_null7, label %null3, label %not_null4 -// -// null3: ; preds = %continue -// %string_ptr = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %loc2, i32 0, i32 0 -// %string_len = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %loc2, i32 0, i32 1 -// store i8* inttoptr (i32 -2128831035 to i8*), i8** %string_ptr -// store i32 -2128831035, i32* %string_len -// br label %continue5 -// -// not_null4: ; preds = %continue -// %4 = extractvalue { i64, i8* } %result6, 0 -// %5 = ashr i64 %4, 32 -// %6 = trunc i64 %5 to i32 -// %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %6, 1 -// %result9 = extractvalue { i64, i8* } %result6, 1 -// %8 = insertvalue %"struct.impala::StringValue" %7, i8* %result9, 0 -// store %"struct.impala::StringValue" %8, %"struct.impala::StringValue"* %loc2 -// br label %continue5 -// -// continue5: ; preds = %not_null4, %null3 -// %is_null_phi10 = phi i1 [ true, %null3 ], [ false, %not_null4 ] -// %has_null11 = or i1 %has_null, %is_null_phi10 -// ret i1 %has_null11 -// } -// -// For each expr, we create 3 code blocks. The null, not null and continue blocks. -// Both the null and not null branch into the continue block. The continue block -// becomes the start of the next block for codegen (either the next expr or just the -// end of the function). -Status NewPartitionedHashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build, Function** fn) { - const vector& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_; - for (int i = 0; i < ctxs.size(); ++i) { - // Disable codegen for CHAR - if (ctxs[i]->root()->type().type == TYPE_CHAR) { - return Status::InternalError("NewPartitionedHashTableCtx::CodegenEvalRow(): CHAR NYI"); - } - } - - // Get types to generate function prototype - Type* this_type = codegen->GetType(NewPartitionedHashTableCtx::LLVM_CLASS_NAME); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = codegen->GetPtrType(this_type); - Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type); - LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : "EvalProbeRow", - codegen->GetType(TYPE_BOOLEAN)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type())); - prototype.AddArgument( - LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type())); - - LLVMContext& context = codegen->context(); - LlvmBuilder builder(context); - Value* args[4]; - *fn = prototype.GeneratePrototype(&builder, args); - Value* this_ptr = args[0]; - Value* row = args[1]; - Value* expr_values = args[2]; - Value* expr_values_null = args[3]; - Value* has_null = codegen->false_value(); - - // ctx_vector = &build_expr_ctxs_[0] / ctx_vector = &probe_expr_ctxs_[0] - Value* ctx_vector = codegen->CodegenCallFunction(&builder, build ? - IRFunction::HASH_TABLE_GET_BUILD_EXPR_CTX : - IRFunction::HASH_TABLE_GET_PROBE_EXPR_CTX, - this_ptr, "ctx_vector"); - - for (int i = 0; i < ctxs.size(); ++i) { - // TODO: refactor this to somewhere else? This is not hash table specific except for - // the null handling bit and would be used for anyone that needs to materialize a - // vector of exprs - // Convert result buffer to llvm ptr type - int offset = expr_values_cache_.expr_values_offsets(i); - Value* loc = builder.CreateInBoundsGEP( - NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc_addr"); - Value* llvm_loc = builder.CreatePointerCast( - loc, codegen->GetPtrType(ctxs[i]->root()->type()), "loc"); - - BasicBlock* null_block = BasicBlock::Create(context, "null", *fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", *fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", *fn); - - // Call expr - Function* expr_fn; - Status status = ctxs[i]->root()->GetCodegendComputeFn(codegen, &expr_fn); - if (!status.ok()) { - (*fn)->eraseFromParent(); // deletes function - *fn = NULL; - return Status::InternalError(Substitute( - "Problem with NewPartitionedHashTableCtx::CodegenEvalRow(): $0", status.GetDetail())); - } - - // Avoid bloating function by inlining too many exprs into it. - if (i >= LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) { - codegen->SetNoInline(expr_fn); - } - - Value* expr_ctx = codegen->CodegenArrayAt(&builder, ctx_vector, i, "expr_ctx"); - CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped( - codegen, &builder, ctxs[i]->root()->type(), expr_fn, {expr_ctx, row}, "result"); - Value* is_null = result.GetIsNull(); - - // Set null-byte result - Value* null_byte = builder.CreateZExt(is_null, codegen->GetType(TYPE_TINYINT)); - Value* llvm_null_byte_loc = builder.CreateInBoundsGEP( - NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc"); - builder.CreateStore(null_byte, llvm_null_byte_loc); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - if (!stores_nulls_) { - // hash table doesn't store nulls, no reason to keep evaluating exprs - builder.CreateRet(codegen->true_value()); - } else { - CodegenAssignNullValue(codegen, &builder, llvm_loc, ctxs[i]->root()->type()); - builder.CreateBr(continue_block); - } - - // Not null block - builder.SetInsertPoint(not_null_block); - result.ToNativePtr(llvm_loc); - builder.CreateBr(continue_block); - - // Continue block - builder.SetInsertPoint(continue_block); - if (stores_nulls_) { - // Update has_null - PHINode* is_null_phi = builder.CreatePHI(codegen->boolean_type(), 2, "is_null_phi"); - is_null_phi->addIncoming(codegen->true_value(), null_block); - is_null_phi->addIncoming(codegen->false_value(), not_null_block); - has_null = builder.CreateOr(has_null, is_null_phi, "has_null"); - } - } - builder.CreateRet(has_null); - - // Avoid inlining a large EvalRow() function into caller. - if (ctxs.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { - codegen->SetNoInline(*fn); - } - - *fn = codegen->FinalizeFunction(*fn); - if (*fn == NULL) { - return Status::InternalError("Codegen'd NewPartitionedHashTableCtx::EvalRow() function failed verification, " - "see log"); - } - return Status::OK(); -} - -// Codegen for hashing the current row. In the case with both string and non-string data -// (group by int_col, string_col), the IR looks like: -// -// define i32 @HashRow(%"class.impala::NewPartitionedHashTableCtx"* %this_ptr, i8* %expr_values, -// i8* %expr_values_null) #34 { -// entry: -// %seed = call i32 @_ZNK6impala12NewPartitionedHashTableCtx11GetHashSeedEv( -// %"class.impala::NewPartitionedHashTableCtx"* %this_ptr) -// %hash = call i32 @CrcHash8(i8* %expr_values, i32 8, i32 %seed) -// %loc_addr = getelementptr i8, i8* %expr_values, i32 8 -// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 1 -// %null_byte = load i8, i8* %null_byte_loc -// %is_null = icmp ne i8 %null_byte, 0 -// br i1 %is_null, label %null, label %not_null -// -// null: ; preds = %entry -// %str_null = call i32 @CrcHash16(i8* %loc_addr, i32 16, i32 %hash) -// br label %continue -// -// not_null: ; preds = %entry -// %str_val = bitcast i8* %loc_addr to %"struct.impala::StringValue"* -// %0 = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %str_val, i32 0, i32 0 -// %1 = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %str_val, i32 0, i32 1 -// %ptr = load i8*, i8** %0 -// %len = load i32, i32* %1 -// %string_hash = call i32 @IrCrcHash(i8* %ptr, i32 %len, i32 %hash) -// br label %continue -// -// continue: ; preds = %not_null, %null -// %hash_phi = phi i32 [ %string_hash, %not_null ], [ %str_null, %null ] -// ret i32 %hash_phi -// } -Status NewPartitionedHashTableCtx::CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur, Function** fn) { - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - // Disable codegen for CHAR - if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) { - return Status::InternalError("NewPartitionedHashTableCtx::CodegenHashRow(): CHAR NYI"); - } - } - - // Get types to generate function prototype - Type* this_type = codegen->GetType(NewPartitionedHashTableCtx::LLVM_CLASS_NAME); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = codegen->GetPtrType(this_type); - - LlvmCodeGen::FnPrototype prototype( - codegen, (use_murmur ? "MurmurHashRow" : "HashRow"), codegen->GetType(TYPE_INT)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type())); - prototype.AddArgument( - LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type())); - - LLVMContext& context = codegen->context(); - LlvmBuilder builder(context); - Value* args[3]; - *fn = prototype.GeneratePrototype(&builder, args); - Value* this_arg = args[0]; - Value* expr_values = args[1]; - Value* expr_values_null = args[2]; - - // Call GetHashSeed() to get seeds_[level_] - Value* seed = codegen->CodegenCallFunction(&builder, - IRFunction::HASH_TABLE_GET_HASH_SEED, this_arg, "seed"); - - Value* hash_result = seed; - const int var_result_offset = expr_values_cache_.var_result_offset(); - const int expr_values_bytes_per_row = expr_values_cache_.expr_values_bytes_per_row(); - if (var_result_offset == -1) { - // No variable length slots, just hash what is in 'expr_expr_values_cache_' - if (expr_values_bytes_per_row > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(expr_values_bytes_per_row) : - codegen->GetHashFunction(expr_values_bytes_per_row); - Value* len = codegen->GetIntConstant(TYPE_INT, expr_values_bytes_per_row); - hash_result = builder.CreateCall( - hash_fn, ArrayRef({expr_values, len, hash_result}), "hash"); - } - } else { - if (var_result_offset > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(var_result_offset) : - codegen->GetHashFunction(var_result_offset); - Value* len = codegen->GetIntConstant(TYPE_INT, var_result_offset); - hash_result = builder.CreateCall( - hash_fn, ArrayRef({expr_values, len, hash_result}), "hash"); - } - - // Hash string slots - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING - && build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue; - - BasicBlock* null_block = NULL; - BasicBlock* not_null_block = NULL; - BasicBlock* continue_block = NULL; - Value* str_null_result = NULL; - - int offset = expr_values_cache_.expr_values_offsets(i); - Value* llvm_loc = builder.CreateInBoundsGEP( - NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc_addr"); - - // If the hash table stores nulls, we need to check if the stringval - // evaluated to NULL - if (stores_nulls_) { - null_block = BasicBlock::Create(context, "null", *fn); - not_null_block = BasicBlock::Create(context, "not_null", *fn); - continue_block = BasicBlock::Create(context, "continue", *fn); - - Value* llvm_null_byte_loc = builder.CreateInBoundsGEP(NULL, expr_values_null, - codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc"); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte"); - Value* is_null = builder.CreateICmpNE( - null_byte, codegen->GetIntConstant(TYPE_TINYINT, 0), "is_null"); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // For null, we just want to call the hash function on the portion of - // the data - builder.SetInsertPoint(null_block); - Function* null_hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(sizeof(StringValue)) : - codegen->GetHashFunction(sizeof(StringValue)); - Value* len = codegen->GetIntConstant(TYPE_INT, sizeof(StringValue)); - str_null_result = builder.CreateCall(null_hash_fn, - ArrayRef({llvm_loc, len, hash_result}), "str_null"); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(not_null_block); - } - - // Convert expr_values_buffer_ loc to llvm value - Value* str_val = builder.CreatePointerCast(llvm_loc, - codegen->GetPtrType(TYPE_STRING), "str_val"); - - Value* ptr = builder.CreateStructGEP(NULL, str_val, 0); - Value* len = builder.CreateStructGEP(NULL, str_val, 1); - ptr = builder.CreateLoad(ptr, "ptr"); - len = builder.CreateLoad(len, "len"); - - // Call hash(ptr, len, hash_result); - Function* general_hash_fn = use_murmur ? codegen->GetMurmurHashFunction() : - codegen->GetHashFunction(); - Value* string_hash_result = builder.CreateCall(general_hash_fn, - ArrayRef({ptr, len, hash_result}), "string_hash"); - - if (stores_nulls_) { - builder.CreateBr(continue_block); - builder.SetInsertPoint(continue_block); - // Use phi node to reconcile that we could have come from the string-null - // path and string not null paths. - PHINode* phi_node = builder.CreatePHI(codegen->GetType(TYPE_INT), 2, "hash_phi"); - phi_node->addIncoming(string_hash_result, not_null_block); - phi_node->addIncoming(str_null_result, null_block); - hash_result = phi_node; - } else { - hash_result = string_hash_result; - } - } - } - - builder.CreateRet(hash_result); - - // Avoid inlining into caller if there are many exprs. - if (build_expr_ctxs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { - codegen->SetNoInline(*fn); - } - *fn = codegen->FinalizeFunction(*fn); - if (*fn == NULL) { - return Status::InternalError( - "Codegen'd NewPartitionedHashTableCtx::HashRow() function failed verification, see log"); - } - return Status::OK(); -} - -// Codegen for NewPartitionedHashTableCtx::Equals. For a group by with (bigint, string), -// the IR looks like: -// -// define i1 @Equals(%"class.impala::NewPartitionedHashTableCtx"* %this_ptr, %"class.impala::TupleRow"* -// %row, -// i8* %expr_values, i8* %expr_values_null) #34 { -// entry: -// %0 = alloca { i64, i8* } -// %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"* -// inttoptr (i64 139107136 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %1 = extractvalue { i8, i64 } %result, 0 -// %is_null = trunc i8 %1 to i1 -// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0 -// %2 = load i8, i8* %null_byte_loc -// %3 = icmp ne i8 %2, 0 -// %loc = getelementptr i8, i8* %expr_values, i32 0 -// %row_val = bitcast i8* %loc to i64* -// br i1 %is_null, label %null, label %not_null -// -// false_block: ; preds = %cmp9, %not_null2, %null1, -// %cmp, %not_null, %null -// ret i1 false -// -// null: ; preds = %entry -// br i1 %3, label %continue, label %false_block -// -// not_null: ; preds = %entry -// br i1 %3, label %false_block, label %cmp -// -// continue: ; preds = %cmp, %null -// %result4 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"* -// inttoptr (i64 139107328 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %4 = extractvalue { i64, i8* } %result4, 0 -// %is_null5 = trunc i64 %4 to i1 -// %null_byte_loc6 = getelementptr i8, i8* %expr_values_null, i32 1 -// %5 = load i8, i8* %null_byte_loc6 -// %6 = icmp ne i8 %5, 0 -// %loc7 = getelementptr i8, i8* %expr_values, i32 8 -// %row_val8 = bitcast i8* %loc7 to %"struct.impala::StringValue"* -// br i1 %is_null5, label %null1, label %not_null2 -// -// cmp: ; preds = %not_null -// %7 = load i64, i64* %row_val -// %val = extractvalue { i8, i64 } %result, 1 -// %cmp_raw = icmp eq i64 %val, %7 -// br i1 %cmp_raw, label %continue, label %false_block -// -// null1: ; preds = %continue -// br i1 %6, label %continue3, label %false_block -// -// not_null2: ; preds = %continue -// br i1 %6, label %false_block, label %cmp9 -// -// continue3: ; preds = %cmp9, %null1 -// ret i1 true -// -// cmp9: ; preds = %not_null2 -// store { i64, i8* } %result4, { i64, i8* }* %0 -// %8 = bitcast { i64, i8* }* %0 to %"struct.impala_udf::StringVal"* -// %cmp_raw10 = call i1 -// @_Z13StringValueEqRKN10impala_udf9StringValERKN6impala11StringValueE( -// %"struct.impala_udf::StringVal"* %8, %"struct.impala::StringValue"* %row_val8) -// br i1 %cmp_raw10, label %continue3, label %false_block -// } -Status NewPartitionedHashTableCtx::CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality, - Function** fn) { - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - // Disable codegen for CHAR - if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) { - return Status::InternalError("NewPartitionedHashTableCtx::CodegenEquals(): CHAR NYI"); - } - } - - // Get types to generate function prototype - Type* this_type = codegen->GetType(NewPartitionedHashTableCtx::LLVM_CLASS_NAME); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = codegen->GetPtrType(this_type); - Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type); - - LlvmCodeGen::FnPrototype prototype(codegen, "Equals", codegen->GetType(TYPE_BOOLEAN)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type())); - prototype.AddArgument( - LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type())); - - LLVMContext& context = codegen->context(); - LlvmBuilder builder(context); - Value* args[4]; - *fn = prototype.GeneratePrototype(&builder, args); - Value* this_ptr = args[0]; - Value* row = args[1]; - Value* expr_values = args[2]; - Value* expr_values_null = args[3]; - - // ctx_vector = &build_expr_ctxs_[0] - Value* ctx_vector = codegen->CodegenCallFunction(&builder, - IRFunction::HASH_TABLE_GET_BUILD_EXPR_CTX, this_ptr, "ctx_vector"); - - BasicBlock* false_block = BasicBlock::Create(context, "false_block", *fn); - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - BasicBlock* null_block = BasicBlock::Create(context, "null", *fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", *fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", *fn); - - // call GetValue on build_exprs[i] - Function* expr_fn; - Status status = build_expr_ctxs_[i]->root()->GetCodegendComputeFn(codegen, &expr_fn); - if (!status.ok()) { - (*fn)->eraseFromParent(); // deletes function - *fn = NULL; - return Status::InternalError( - Substitute("Problem with NewPartitionedHashTableCtx::CodegenEquals: $0", status.GetDetail())); - } - if (build_expr_ctxs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) { - // Avoid bloating function by inlining too many exprs into it. - codegen->SetNoInline(expr_fn); - } - - // Load ExprContext*: expr_ctx = ctx_vector[i]; - Value* expr_ctx = codegen->CodegenArrayAt(&builder, ctx_vector, i, "expr_ctx"); - - // Evaluate the expression. - CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(codegen, &builder, - build_expr_ctxs_[i]->root()->type(), expr_fn, {expr_ctx, row}, "result"); - Value* is_null = result.GetIsNull(); - - // Determine if row is null (i.e. expr_values_null[i] == true). In - // the case where the hash table does not store nulls, this is always false. - Value* row_is_null = codegen->false_value(); - - // We consider null values equal if we are comparing build rows or if the join - // predicate is <=> - if (force_null_equality || finds_nulls_[i]) { - Value* llvm_null_byte_loc = builder.CreateInBoundsGEP( - NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc"); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - row_is_null = - builder.CreateICmpNE(null_byte, codegen->GetIntConstant(TYPE_TINYINT, 0)); - } - - // Get llvm value for row_val from 'expr_values' - int offset = expr_values_cache_.expr_values_offsets(i); - Value* loc = builder.CreateInBoundsGEP( - NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc"); - Value* row_val = builder.CreatePointerCast( - loc, codegen->GetPtrType(build_expr_ctxs_[i]->root()->type()), "row_val"); - - // Branch for GetValue() returning NULL - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - builder.CreateCondBr(row_is_null, continue_block, false_block); - - // Not-null block - builder.SetInsertPoint(not_null_block); - if (stores_nulls_) { - BasicBlock* cmp_block = BasicBlock::Create(context, "cmp", *fn); - // First need to compare that row expr[i] is not null - builder.CreateCondBr(row_is_null, false_block, cmp_block); - builder.SetInsertPoint(cmp_block); - } - // Check result == row_val - Value* is_equal = result.EqToNativePtr(row_val); - builder.CreateCondBr(is_equal, continue_block, false_block); - - builder.SetInsertPoint(continue_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - - // Avoid inlining into caller if it is large. - if (build_expr_ctxs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { - codegen->SetNoInline(*fn); - } - *fn = codegen->FinalizeFunction(*fn); - if (*fn == NULL) { - return Status::InternalError("Codegen'd NewPartitionedHashTableCtx::Equals() function failed verification, " - "see log"); - } - return Status::OK(); -} - -Status NewPartitionedHashTableCtx::ReplaceHashTableConstants(LlvmCodeGen* codegen, - bool stores_duplicates, int num_build_tuples, Function* fn, - HashTableReplacedConstants* replacement_counts) { - - replacement_counts->stores_nulls = codegen->ReplaceCallSitesWithBoolConst( - fn, stores_nulls(), "stores_nulls"); - replacement_counts->finds_some_nulls = codegen->ReplaceCallSitesWithBoolConst( - fn, finds_some_nulls(), "finds_some_nulls"); - replacement_counts->stores_tuples = codegen->ReplaceCallSitesWithBoolConst( - fn, num_build_tuples == 1, "stores_tuples"); - replacement_counts->stores_duplicates = codegen->ReplaceCallSitesWithBoolConst( - fn, stores_duplicates, "stores_duplicates"); - replacement_counts->quadratic_probing = codegen->ReplaceCallSitesWithBoolConst( - fn, FLAGS_enable_quadratic_probing, "quadratic_probing"); - return Status::OK(); -} - -#endif - diff --git a/be/src/exec/new_partitioned_hash_table.h b/be/src/exec/new_partitioned_hash_table.h index fda89dbff6..e4faf7a95a 100644 --- a/be/src/exec/new_partitioned_hash_table.h +++ b/be/src/exec/new_partitioned_hash_table.h @@ -33,15 +33,10 @@ #include "util/bitmap.h" #include "util/hash_util.hpp" -namespace llvm { - class Function; -} - namespace doris { class Expr; class ExprContext; -class LlvmCodeGen; class MemTracker; class RowDescriptor; class RuntimeState; @@ -169,24 +164,6 @@ class NewPartitionedHashTableCtx { bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row); bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row); - /// Codegen for evaluating a tuple row. Codegen'd function matches the signature - /// for EvalBuildRow and EvalTupleRow. - /// If build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs. - Status CodegenEvalRow(LlvmCodeGen* codegen, bool build_row, llvm::Function** fn); - - /// Codegen for evaluating a TupleRow and comparing equality. Function signature - /// matches HashTable::Equals(). 'force_null_equality' is true if the generated - /// equality function should treat all NULLs as equal. See the template parameter - /// to HashTable::Equals(). - Status CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality, - llvm::Function** fn); - - /// Codegen for hashing expr values. Function prototype matches HashRow identically. - /// Unlike HashRow(), the returned function only uses a single hash function, rather - /// than switching based on level_. If 'use_murmur' is true, murmur hash is used, - /// otherwise CRC is used if the hardware supports it (see hash-util.h). - Status CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur, llvm::Function** fn); - /// Struct that returns the number of constants replaced by ReplaceConstants(). struct HashTableReplacedConstants { int stores_nulls; @@ -196,15 +173,6 @@ class NewPartitionedHashTableCtx { int quadratic_probing; }; - /// Replace hash table parameters with constants in 'fn'. Updates 'replacement_counts' - /// with the number of replacements made. 'num_build_tuples' and 'stores_duplicates' - /// correspond to HashTable parameters with the same name. - Status ReplaceHashTableConstants(LlvmCodeGen* codegen, bool stores_duplicates, - int num_build_tuples, llvm::Function* fn, - HashTableReplacedConstants* replacement_counts); - - static const char* LLVM_CLASS_NAME; - /// To enable prefetching, the hash table building and probing are pipelined by the /// exec nodes. A set of rows in a row batch will be evaluated and hashed first and /// the corresponding hash table buckets are prefetched before they are probed against @@ -483,11 +451,6 @@ class NewPartitionedHashTableCtx { bool IR_NO_INLINE stores_nulls() const { return stores_nulls_; } bool IR_NO_INLINE finds_some_nulls() const { return finds_some_nulls_; } - /// Cross-compiled function to access the build/probe expression context. - /// Called by generated LLVM IR functions such as Equals() and EvalRow(). - ExprContext* const* IR_ALWAYS_INLINE build_expr_evals() const; - ExprContext* const* IR_ALWAYS_INLINE probe_expr_evals() const; - const std::vector& build_exprs_; std::vector build_expr_evals_; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index db769c6ba4..921b539b49 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -24,7 +24,6 @@ #include #include -#include "codegen/llvm_codegen.h" #include "common/logging.h" #include "exprs/expr.h" #include "exprs/binary_predicate.h" @@ -42,8 +41,6 @@ #include "common/resource_tls.h" #include -using llvm::Function; - namespace doris { #define DS_SUCCESS(x) ((x) >= 0) @@ -178,17 +175,6 @@ Status OlapScanNode::prepare(RuntimeState* state) { _string_slots.push_back(slots[i]); } - if (state->codegen_level() > 0) { - LlvmCodeGen* codegen = NULL; - RETURN_IF_ERROR(state->get_codegen(&codegen)); - Function* codegen_eval_conjuncts_fn = codegen_eval_conjuncts(state, _conjunct_ctxs); - if (codegen_eval_conjuncts_fn != NULL) { - codegen->add_function_to_jit(codegen_eval_conjuncts_fn, - reinterpret_cast(&_eval_conjuncts_fn)); - // AddRuntimeExecOption("Probe Side Codegen Enabled"); - } - } - _runtime_state = state; return Status::OK(); } diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index a9bc315f64..8d31a9074a 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -21,8 +21,6 @@ #include #include -#include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exec/partitioned_hash_table.inline.h" #include "exprs/agg_fn_evaluator.h" #include "exprs/expr.h" @@ -43,14 +41,10 @@ #include "gen_cpp/Exprs_types.h" #include "gen_cpp/PlanNodes_types.h" -// using namespace llvm; using std::list; namespace doris { -const char* PartitionedAggregationNode::_s_llvm_class_name = - "class.doris::PartitionedAggregationNode"; - PartitionedAggregationNode::PartitionedAggregationNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), @@ -93,14 +87,6 @@ Status PartitionedAggregationNode::init(const TPlanNode& tnode, RuntimeState* st Status PartitionedAggregationNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - // Create the codegen object before preparing _conjunct_ctxs and _children, so that any - // ScalarFnCalls will use codegen. - // TODO: this is brittle and hard to reason about, revisit - // if (state->codegen_enabled()) { - // LlvmCodeGen* codegen; - // RETURN_IF_ERROR(state->get_codegen(&codegen)); - // } - RETURN_IF_ERROR(ExecNode::prepare(state)); _state = state; @@ -201,16 +187,6 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { DCHECK(_serialize_stream->has_write_block()); } - // if (state->codegen_enabled()) { - // LlvmCodeGen* codegen; - // RETURN_IF_ERROR(state->get_codegen(&codegen)); - // Function* codegen_process_row_batch_fn = codegen_process_batch(); - // if (codegen_process_row_batch_fn != NULL) { - // codegen->AddFunctionToJit(codegen_process_row_batch_fn, - // reinterpret_cast(&_process_row_batch_fn)); - // add_runtime_exec_option("Codegen Enabled"); - // } - // } return Status::OK(); } @@ -1111,461 +1087,7 @@ void PartitionedAggregationNode::close_partitions() { // } // return ExecNode::QueryMaintenance(state); // } - -// IR Generation for updating a single aggregation slot. Signature is: -// void UpdateSlot(FunctionContext* fn_ctx, AggTuple* agg_tuple, char** row) // -// The IR for sum(double_col) is: -// define void @UpdateSlot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, double }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 128241264 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %dst_val = load double* %dst_slot_ptr -// %val = extractvalue { i8, double } %src, 1 -// %1 = fadd double %dst_val, %val -// store double %1, double* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -// -// The IR for ndv(double_col) is: -// define void @UpdateSlot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, %"struct.doris::StringValue" }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %dst_lowered_ptr = alloca { i64, i8* } -// %src_lowered_ptr = alloca { i8, double } -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 120530832 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds -// { i8, %"struct.doris::StringValue" }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, %"struct.doris::StringValue" }* %agg_tuple) -// %dst_val = load %"struct.doris::StringValue"* %dst_slot_ptr -// store { i8, double } %src, { i8, double }* %src_lowered_ptr -// %src_unlowered_ptr = bitcast { i8, double }* %src_lowered_ptr -// to %"struct.doris_udf::DoubleVal"* -// %ptr = extractvalue %"struct.doris::StringValue" %dst_val, 0 -// %dst_stringval = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1 -// %len = extractvalue %"struct.doris::StringValue" %dst_val, 1 -// %1 = extractvalue { i64, i8* } %dst_stringval, 0 -// %2 = zext i32 %len to i64 -// %3 = shl i64 %2, 32 -// %4 = and i64 %1, 4294967295 -// %5 = or i64 %4, %3 -// %dst_stringval1 = insertvalue { i64, i8* } %dst_stringval, i64 %5, 0 -// store { i64, i8* } %dst_stringval1, { i64, i8* }* %dst_lowered_ptr -// %dst_unlowered_ptr = bitcast { i64, i8* }* %dst_lowered_ptr -// to %"struct.doris_udf::StringVal"* -// call void @HllUpdate(%"class.doris_udf::FunctionContext"* %fn_ctx, -// %"struct.doris_udf::DoubleVal"* %src_unlowered_ptr, -// %"struct.doris_udf::StringVal"* %dst_unlowered_ptr) -// %anyval_result = load { i64, i8* }* %dst_lowered_ptr -// %6 = extractvalue { i64, i8* } %anyval_result, 1 -// %7 = insertvalue %"struct.doris::StringValue" zeroinitializer, i8* %6, 0 -// %8 = extractvalue { i64, i8* } %anyval_result, 0 -// %9 = ashr i64 %8, 32 -// %10 = trunc i64 %9 to i32 -// %11 = insertvalue %"struct.doris::StringValue" %7, i32 %10, 1 -// store %"struct.doris::StringValue" %11, %"struct.doris::StringValue"* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -llvm::Function* PartitionedAggregationNode::codegen_update_slot( - AggFnEvaluator* evaluator, SlotDescriptor* slot_desc) { - DCHECK(slot_desc->is_materialized()); - LlvmCodeGen* codegen = NULL; - if (!_state->get_codegen(&codegen).ok()) { - return NULL; - } - - DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1); - ExprContext* input_expr_ctx = evaluator->input_expr_ctxs()[0]; - Expr* input_expr = input_expr_ctx->root(); - - // TODO: implement timestamp - // if (input_expr->type().type == TYPE_TIMESTAMP && - // evaluator->agg_op() != AggFnEvaluator::AVG) { - // return NULL; - // } - - Function* agg_expr_fn = NULL; - Status status = input_expr->get_codegend_compute_fn(_state, &agg_expr_fn); - if (!status.ok()) { - VLOG_QUERY << "Could not codegen UpdateSlot(): " << status.get_error_msg(); - return NULL; - } - DCHECK(agg_expr_fn != NULL); - - PointerType* fn_ctx_type = - codegen->get_ptr_type(FunctionContextImpl::_s_llvm_functioncontext_name); - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - if (tuple_struct == NULL) return NULL; // Could not generate tuple struct - PointerType* tuple_ptr_type = PointerType::get(tuple_struct, 0); - PointerType* tuple_row_ptr_type = codegen->get_ptr_type(TupleRow::_s_llvm_class_name); - - // Create UpdateSlot prototype - LlvmCodeGen::FnPrototype prototype(codegen, "UpdateSlot", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("fn_ctx", fn_ctx_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - Value* fn_ctx_arg = args[0]; - Value* agg_tuple_arg = args[1]; - Value* row_arg = args[2]; - - BasicBlock* src_not_null_block = - BasicBlock::create(codegen->context(), "src_not_null", fn); - BasicBlock* ret_block = BasicBlock::create(codegen->context(), "ret", fn); - - // Call expr function to get src slot value - Value* expr_ctx = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), input_expr_ctx); - Value* agg_expr_fn_args[] = { expr_ctx, row_arg }; - CodegenAnyVal src = CodegenAnyVal::create_call_wrapped( - codegen, &builder, input_expr->type(), agg_expr_fn, agg_expr_fn_args, "src"); - - Value* src_is_null = src.get_is_null(); - builder.create_cond_br(src_is_null, ret_block, src_not_null_block); - - // Src slot is not null, update dst_slot - builder.set_insert_point(src_not_null_block); - Value* dst_ptr = - builder.create_struct_gep(agg_tuple_arg, slot_desc->field_idx(), "dst_slot_ptr"); - Value* result = NULL; - - if (slot_desc->is_nullable()) { - // Dst is NULL, just update dst slot to src slot and clear null bit - Function* clear_null_fn = slot_desc->CodegenUpdateNull(codegen, tuple_struct, false); - builder.CreateCall(clear_null_fn, agg_tuple_arg); - } - - // Update the slot - Value* dst_value = builder.CreateLoad(dst_ptr, "dst_val"); - switch (evaluator->agg_op()) { - case AggFnEvaluator::COUNT: - if (evaluator->is_merge()) { - result = builder.CreateAdd(dst_value, src.GetVal(), "count_sum"); - } else { - result = builder.CreateAdd(dst_value, - codegen->get_int_constant(TYPE_BIGINT, 1), "count_inc"); - } - break; - case AggFnEvaluator::MIN: { - Function* min_fn = codegen->CodegenMinMax(slot_desc->type(), true); - Value* min_args[] = { dst_value, src.GetVal() }; - result = builder.CreateCall(min_fn, min_args, "min_value"); - break; - } - case AggFnEvaluator::MAX: { - Function* max_fn = codegen->CodegenMinMax(slot_desc->type(), false); - Value* max_args[] = { dst_value, src.GetVal() }; - result = builder.CreateCall(max_fn, max_args, "max_value"); - break; - } - case AggFnEvaluator::SUM: - if (slot_desc->type().type != TYPE_DECIMAL && slot_desc->type().type != TYPE_DECIMALV2) { - if (slot_desc->type().type == TYPE_FLOAT || - slot_desc->type().type == TYPE_DOUBLE) { - result = builder.CreateFAdd(dst_value, src.GetVal()); - } else { - result = builder.CreateAdd(dst_value, src.GetVal()); - } - break; - } - DCHECK(slot_desc->type().type == TYPE_DECIMAL || slot_desc->type().type == TYPE_DECIMALV2); - // Fall through to xcompiled case - case AggFnEvaluator::AVG: - case AggFnEvaluator::NDV: { - // Get xcompiled update/merge function from IR module - const string& symbol = evaluator->is_merge() ? - evaluator->merge_symbol() : evaluator->update_symbol(); - Function* ir_fn = codegen->module()->getFunction(symbol); - DCHECK(ir_fn != NULL); - - // Create pointer to src to pass to ir_fn. We must use the unlowered type. - Value* src_lowered_ptr = codegen->CreateEntryBlockAlloca( - fn, LlvmCodeGen::NamedVariable("src_lowered_ptr", src.value()->getType())); - builder.CreateStore(src.value(), src_lowered_ptr); - Type* unlowered_ptr_type = - CodegenAnyVal::GetUnloweredPtrType(codegen, input_expr->type()); - Value* src_unlowered_ptr = - builder.CreateBitCast(src_lowered_ptr, unlowered_ptr_type, "src_unlowered_ptr"); - - // Create intermediate argument 'dst' from 'dst_value' - const ColumnType& dst_type = evaluator->intermediate_type(); - CodegenAnyVal dst = CodegenAnyVal::GetNonNullVal( - codegen, &builder, dst_type, "dst"); - dst.SetFromRawValue(dst_value); - // Create pointer to dst to pass to ir_fn. We must use the unlowered type. - Value* dst_lowered_ptr = codegen->CreateEntryBlockAlloca( - fn, LlvmCodeGen::NamedVariable("dst_lowered_ptr", dst.value()->getType())); - builder.CreateStore(dst.value(), dst_lowered_ptr); - unlowered_ptr_type = CodegenAnyVal::GetUnloweredPtrType(codegen, dst_type); - Value* dst_unlowered_ptr = - builder.CreateBitCast(dst_lowered_ptr, unlowered_ptr_type, "dst_unlowered_ptr"); - - // Call 'ir_fn' - builder.CreateCall3(ir_fn, fn_ctx_arg, src_unlowered_ptr, dst_unlowered_ptr); - - // Convert StringVal intermediate 'dst_arg' back to StringValue - Value* anyval_result = builder.CreateLoad(dst_lowered_ptr, "anyval_result"); - result = CodegenAnyVal(codegen, &builder, dst_type, anyval_result).ToNativeValue(); - break; - } - default: - DCHECK(false) << "bad aggregate operator: " << evaluator->agg_op(); - } - - builder.CreateStore(result, dst_ptr); - builder.CreateBr(ret_block); - - builder.SetInsertPoint(ret_block); - builder.CreateRetVoid(); - - return codegen->FinalizeFunction(fn); -} - -// IR codegen for the update_tuple loop. This loop is query specific and based on the -// aggregate functions. The function signature must match the non- codegen'd update_tuple -// exactly. -// For the query: -// select count(*), count(int_col), sum(double_col) the IR looks like: -// - -// ; Function Attrs: alwaysinline -// define void @update_tuple(%"class.doris::PartitionedAggregationNode"* %this_ptr, -// %"class.doris_udf::FunctionContext"** %agg_fn_ctxs, -// %"class.doris::Tuple"* %tuple, -// %"class.doris::TupleRow"* %row, -// i1 %is_merge) #20 { -// entry: -// %tuple1 = bitcast %"class.doris::Tuple"* %tuple to { i8, i64, i64, double }* -// %src_slot = getelementptr inbounds { i8, i64, i64, double }* %tuple1, i32 0, i32 1 -// %count_star_val = load i64* %src_slot -// %count_star_inc = add i64 %count_star_val, 1 -// store i64 %count_star_inc, i64* %src_slot -// %0 = getelementptr %"class.doris_udf::FunctionContext"** %agg_fn_ctxs, i32 1 -// %fn_ctx = load %"class.doris_udf::FunctionContext"** %0 -// call void @UpdateSlot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, i64, i64, double }* %tuple1, -// %"class.doris::TupleRow"* %row) -// %1 = getelementptr %"class.doris_udf::FunctionContext"** %agg_fn_ctxs, i32 2 -// %fn_ctx2 = load %"class.doris_udf::FunctionContext"** %1 -// call void @UpdateSlot5(%"class.doris_udf::FunctionContext"* %fn_ctx2, -// { i8, i64, i64, double }* %tuple1, -// %"class.doris::TupleRow"* %row) -// ret void -// } -Function* PartitionedAggregationNode::codegen_update_tuple() { - LlvmCodeGen* codegen = NULL; - if (!_state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - - int j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - - // Don't codegen things that aren't builtins (for now) - if (!evaluator->is_builtin()) { - return NULL; - } - - bool supported = true; - AggFnEvaluator::AggregationOp op = evaluator->agg_op(); - PrimitiveType type = slot_desc->type().type; - // Char and timestamp intermediates aren't supported - if (type == TYPE_TIMESTAMP || type == TYPE_CHAR) { - supported = false; - } - // Only AVG and NDV support string intermediates - if ((type == TYPE_STRING || type == TYPE_VARCHAR) && - !(op == AggFnEvaluator::AVG || op == AggFnEvaluator::NDV)) { - supported = false; - } - // Only SUM, AVG, and NDV support decimal intermediates - if (type == TYPE_DECIMAL && - !(op == AggFnEvaluator::SUM || op == AggFnEvaluator::AVG || - op == AggFnEvaluator::NDV)) { - supported = false; - } - if (type == TYPE_DECIMALV2 && - !(op == AggFnEvaluator::SUM || op == AggFnEvaluator::AVG || - op == AggFnEvaluator::NDV)) { - supported = false; - } - if (!supported) { - VLOG_QUERY << "Could not codegen update_tuple because intermediate type " - << slot_desc->type() - << " is not yet supported for aggregate function \"" - << evaluator->fn_name() << "()\""; - return NULL; - } - } - - if (_intermediate_tuple_desc->generate_llvm_struct(codegen) == NULL) { - VLOG_QUERY << "Could not codegen update_tuple because we could" - << "not generate a matching llvm struct for the intermediate tuple."; - return NULL; - } - - // Get the types to match the update_tuple signature - Type* agg_node_type = codegen->get_type(PartitionedAggregationNode::_s_llvm_class_name); - Type* fn_ctx_type = codegen->get_type(FunctionContextImpl::_s_llvm_functioncontext_name); - Type* tuple_type = codegen->get_type(Tuple::_s_llvm_class_name); - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - - PointerType* agg_node_ptr_type = agg_node_type->getPointerTo(); - PointerType* fn_ctx_ptr_ptr_type = fn_ctx_type->getPointerTo()->getPointerTo(); - PointerType* tuple_ptr_type = tuple_type->getPointerTo(); - PointerType* tuple_row_ptr_type = tuple_row_type->getPointerTo(); - - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - PointerType* tuple_ptr = PointerType::get(tuple_struct, 0); - LlvmCodeGen::FnPrototype prototype(codegen, "update_tuple", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", agg_node_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_fn_ctxs", fn_ctx_ptr_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("is_merge", codegen->boolean_type())); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[5]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - - Value* agg_fn_ctxs_arg = args[1]; - Value* tuple_arg = args[2]; - Value* row_arg = args[3]; - - // Cast the parameter types to the internal llvm runtime types. - // TODO: get rid of this by using right type in function signature - tuple_arg = builder.CreateBitCast(tuple_arg, tuple_ptr, "tuple"); - - // Loop over each expr and generate the IR for that slot. If the expr is not - // count(*), generate a helper IR function to update the slot and call that. - j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - if (evaluator->is_count_star()) { - // TODO: we should be able to hoist this up to the loop over the batch and just - // increment the slot by the number of rows in the batch. - int field_idx = slot_desc->field_idx(); - Value* const_one = codegen->get_int_constant(TYPE_BIGINT, 1); - Value* slot_ptr = builder.create_struct_gep(tuple_arg, field_idx, "src_slot"); - Value* slot_loaded = builder.CreateLoad(slot_ptr, "count_star_val"); - Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc"); - builder.CreateStore(count_inc, slot_ptr); - } else { - Function* update_slot_fn = codegen_update_slot(evaluator, slot_desc); - if (update_slot_fn == NULL) return NULL; - Value* fn_ctx_ptr = builder.CreateConstGEP1_32(agg_fn_ctxs_arg, i); - Value* fn_ctx = builder.CreateLoad(fn_ctx_ptr, "fn_ctx"); - builder.CreateCall3(update_slot_fn, fn_ctx, tuple_arg, row_arg); - } - } - builder.CreateRetVoid(); - - // codegen_process_batch() does the final optimizations. - return codegen->FinalizeFunction(fn); -} - -Function* PartitionedAggregationNode::codegen_process_batch() { - LlvmCodeGen* codegen = NULL; - if (!_state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - - Function* update_tuple_fn = codegen_update_tuple(); - if (update_tuple_fn == NULL) { - return NULL; - } - - // Get the cross compiled update row batch function - IRFunction::Type ir_fn = (!_probe_expr_ctxs.empty() ? - IRFunction::PART_AGG_NODE_PROCESS_BATCH_FALSE : - IRFunction::PART_AGG_NODE_PROCESS_BATCH_NO_GROUPING); - Function* process_batch_fn = codegen->get_function(ir_fn); - DCHECK(process_batch_fn != NULL); - - int replaced = 0; - if (!_probe_expr_ctxs.empty()) { - // Aggregation w/o grouping does not use a hash table. - - // Codegen for hash - // The codegen'd process_batch function is only used in open() with _level = 0, - // so don't use murmur hash - Function* hash_fn = _ht_ctx->codegen_hash_current_row(_state, /* use murmur */ false); - if (hash_fn == NULL) { - return NULL; - } - - // Codegen PartitionedHashTable::Equals - Function* equals_fn = _ht_ctx->codegen_equals(_state); - if (equals_fn == NULL) { - return NULL; - } - - // Codegen for evaluating probe rows - Function* eval_probe_row_fn = _ht_ctx->codegen_eval_row(_state, false); - if (eval_probe_row_fn == NULL) { - return NULL; - } - - // Replace call sites - process_batch_fn = codegen->replace_call_sites(process_batch_fn, false, - eval_probe_row_fn, "EvalProbeRow", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites(process_batch_fn, true, - hash_fn, "HashCurrentRow", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites(process_batch_fn, true, - equals_fn, "Equals", &replaced); - DCHECK_EQ(replaced, 1); - } - - process_batch_fn = codegen->replace_call_sites(process_batch_fn, false, - update_tuple_fn, "update_tuple", &replaced); - DCHECK_GE(replaced, 1); - DCHECK(process_batch_fn != NULL); - return codegen->optimize_function_with_exprs(process_batch_fn); -} #endif } diff --git a/be/src/exec/partitioned_aggregation_node.h b/be/src/exec/partitioned_aggregation_node.h index d963b8a74d..bbcc3e0e76 100644 --- a/be/src/exec/partitioned_aggregation_node.h +++ b/be/src/exec/partitioned_aggregation_node.h @@ -29,14 +29,9 @@ #include "runtime/mem_pool.h" #include "runtime/string_value.h" -namespace llvm { - class Function; -} - namespace doris { class AggFnEvaluator; -class LlvmCodeGen; class RowBatch; class RuntimeState; struct StringValue; @@ -109,8 +104,6 @@ public: // virtual void close(RuntimeState* state); virtual Status close(RuntimeState* state); - static const char* _s_llvm_class_name; - protected: // Frees local allocations from _aggregate_evaluators and agg_fn_ctxs // virtual Status QueryMaintenance(RuntimeState* state); @@ -453,20 +446,6 @@ private: void cleanup_hash_tbl(const std::vector& agg_fn_ctxs, PartitionedHashTable::Iterator it); - // Codegen UpdateSlot(). Returns NULL if codegen is unsuccessful. - // Assumes is_merge = false; - llvm::Function* codegen_update_slot(AggFnEvaluator* evaluator, SlotDescriptor* slot_desc); - - // Codegen update_tuple(). Returns NULL if codegen is unsuccessful. - llvm::Function* codegen_update_tuple(); - - // Codegen the process row batch loop. The loop has already been compiled to - // IR and loaded into the codegen object. UpdateAggTuple has also been - // codegen'd to IR. This function will modify the loop subsituting the statically - // compiled functions with codegen'd ones. - // Assumes AGGREGATED_ROWS = false. - llvm::Function* codegen_process_batch(); - // We need two buffers per partition, one for the aggregated stream and one // for the unaggregated stream. We need an additional buffer to read the stream // we are currently repartitioning. diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index e14bdd6b09..61d9d2e6e7 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -17,8 +17,6 @@ #include "exec/partitioned_hash_table.inline.h" -#include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "exprs/slot_ref.h" @@ -29,8 +27,6 @@ #include "runtime/string_value.hpp" #include "util/doris_metrics.h" -// using namespace llvm; - // DEFINE_bool(enable_quadratic_probing, true, "Enable quadratic probing hash table"); using std::string; @@ -38,20 +34,8 @@ using std::stringstream; using std::vector; using std::endl; -using llvm::BasicBlock; -using llvm::Value; -using llvm::Function; -using llvm::Type; -using llvm::PointerType; -using llvm::LLVMContext; -using llvm::PHINode; -using llvm::ConstantFP; -using llvm::APFloat; - namespace doris { -const char* PartitionedHashTableCtx::_s_llvm_class_name = "class.doris::PartitionedHashTableCtx"; - // Random primes to multiply the seed with. static uint32_t SEED_PRIMES[] = { 1, // First seed must be 1, level 0 is used by other operators in the fragment. @@ -452,494 +436,5 @@ string PartitionedHashTable::print_stats() const { return ss.str(); } -#if 0 -// Helper function to store a value into the results buffer if the expr -// evaluated to NULL. We don't want (NULL, 1) to hash to the same as (0,1) so -// we'll pick a more random value. -static void codegen_assign_null_value(LlvmCodeGen* codegen, - // LlvmCodeGen::LlvmBuilder* builder, Value* dst, const ColumnType& type) { - LlvmCodeGen::LlvmBuilder* builder, Value* dst, const TypeDescriptor& type) { - int64_t fvn_seed = HashUtil::FNV_SEED; - - // if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) { - if (type.type == TYPE_VARCHAR) { - Value* dst_ptr = builder->CreateStructGEP(dst, 0, "string_ptr"); - Value* dst_len = builder->CreateStructGEP(dst, 1, "string_len"); - Value* null_len = codegen->get_int_constant(TYPE_INT, fvn_seed); - Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); - builder->CreateStore(null_ptr, dst_ptr); - builder->CreateStore(null_len, dst_len); - } else { - Value* null_value = NULL; - // Get a type specific representation of fvn_seed - switch (type.type) { - case TYPE_BOOLEAN: - // In results, booleans are stored as 1 byte - dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->get_int_constant(TYPE_TINYINT, fvn_seed); - break; - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - null_value = codegen->get_int_constant(type.type, fvn_seed); - break; - case TYPE_FLOAT: { - // Don't care about the value, just the bit pattern - float fvn_seed_float = *reinterpret_cast(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_float)); - break; - } - case TYPE_DOUBLE: { - // Don't care about the value, just the bit pattern - double fvn_seed_double = *reinterpret_cast(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_double)); - break; - } - default: - DCHECK(false); - } - builder->CreateStore(null_value, dst); - } -} - -// Codegen for evaluating a tuple row over either _build_expr_ctxs or _probe_expr_ctxs. -// For the case where we are joining on a single int, the IR looks like -// define i1 @EvalBuildRow(%"class.impala::PartitionedHashTableCtx"* %this_ptr, -// %"class.impala::TupleRow"* %row) #20 { -// entry: -// %result = call i64 @GetSlotRef1(%"class.impala::ExprContext"* inttoptr -// (i64 67971664 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %is_null = trunc i64 %result to i1 -// %0 = zext i1 %is_null to i8 -// store i8 %0, i8* inttoptr (i64 95753144 to i8*) -// br i1 %is_null, label %null, label %not_null -// -// null: ; preds = %entry -// store i32 -2128831035, i32* inttoptr (i64 95753128 to i32*) -// br label %continue -// -// not_null: ; preds = %entry -// %1 = ashr i64 %result, 32 -// %2 = trunc i64 %1 to i32 -// store i32 %2, i32* inttoptr (i64 95753128 to i32*) -// br label %continue -// -// continue: ; preds = %not_null, %null -// ret i1 true -// } -// For each expr, we create 3 code blocks. The null, not null and continue blocks. -// Both the null and not null branch into the continue block. The continue block -// becomes the start of the next block for codegen (either the next expr or just the -// end of the function). -Function* PartitionedHashTableCtx::codegen_eval_row(RuntimeState* state, bool build) { - // TODO: codegen_assign_null_value() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet - const vector& ctxs = build ? _build_expr_ctxs : _probe_expr_ctxs; - for (int i = 0; i < ctxs.size(); ++i) { - PrimitiveType type = ctxs[i]->root()->type().type; - // if (type == TYPE_TIMESTAMP || type == TYPE_DECIMAL || type == TYPE_CHAR) { - if (type == TYPE_DATETIME || type == TYPE_DATE - || type == TYPE_DECIMAL || type == TYPE_CHAR) { - return NULL; - } - } - - LlvmCodeGen* codegen; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(PartitionedHashTableCtx::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : "EvalProbeRow", - codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - - Value* row = args[1]; - Value* has_null = codegen->false_value(); - - for (int i = 0; i < ctxs.size(); ++i) { - // TODO: refactor this to somewhere else? This is not hash table specific except for - // the null handling bit and would be used for anyone that needs to materialize a - // vector of exprs - // Convert result buffer to llvm ptr type - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ctxs[i]->root()->type()), loc); - - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // Call expr - Function* expr_fn; - Status status = ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - VLOG_QUERY << "Problem with codegen_eval_row: " << status.get_error_msg(); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, ctxs[i]->root()->type(), expr_fn, expr_fn_args, "result"); - Value* is_null = result.get_is_null(); - - // Set null-byte result - Value* null_byte = builder.CreateZExt(is_null, codegen->get_type(TYPE_TINYINT)); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - builder.CreateStore(null_byte, llvm_null_byte_loc); - - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - if (!_stores_nulls) { - // hash table doesn't store nulls, no reason to keep evaluating exprs - builder.CreateRet(codegen->true_value()); - } else { - codegen_assign_null_value(codegen, &builder, llvm_loc, ctxs[i]->root()->type()); - builder.CreateBr(continue_block); - } - - // Not null block - builder.SetInsertPoint(not_null_block); - result.ToNativePtr(llvm_loc); - builder.CreateBr(continue_block); - - // Continue block - builder.SetInsertPoint(continue_block); - if (_stores_nulls) { - // Update has_null - PHINode* is_null_phi = builder.CreatePHI(codegen->boolean_type(), 2, "is_null_phi"); - is_null_phi->addIncoming(codegen->true_value(), null_block); - is_null_phi->addIncoming(codegen->false_value(), not_null_block); - has_null = builder.CreateOr(has_null, is_null_phi, "has_null"); - } - } - builder.CreateRet(has_null); - - return codegen->FinalizeFunction(fn); -} - -// Codegen for hashing the current row. In the case with both string and non-string data -// (group by int_col, string_col), the IR looks like: -// define i32 @HashCurrentRow(%"class.impala::PartitionedHashTableCtx"* %this_ptr) #20 { -// entry: -// %seed = call i32 @get_hash_seed(%"class.impala::PartitionedHashTableCtx"* %this_ptr) -// %0 = call i32 @CrcHash16(i8* inttoptr (i64 119151296 to i8*), i32 16, i32 %seed) -// %1 = load i8* inttoptr (i64 119943721 to i8*) -// %2 = icmp ne i8 %1, 0 -// br i1 %2, label %null, label %not_null -// -// null: ; preds = %entry -// %3 = call i32 @CrcHash161(i8* inttoptr (i64 119151312 to i8*), i32 16, i32 %0) -// br label %continue -// -// not_null: ; preds = %entry -// %4 = load i8** getelementptr inbounds (%"struct.impala::StringValue"* inttoptr -// (i64 119151312 to %"struct.impala::StringValue"*), i32 0, i32 0) -// %5 = load i32* getelementptr inbounds (%"struct.impala::StringValue"* inttoptr -// (i64 119151312 to %"struct.impala::StringValue"*), i32 0, i32 1) -// %6 = call i32 @IrCrcHash(i8* %4, i32 %5, i32 %0) -// br label %continue -// -// continue: ; preds = %not_null, %null -// %7 = phi i32 [ %6, %not_null ], [ %3, %null ] -// call void @set_hash(%"class.impala::PartitionedHashTableCtx"* %this_ptr, i32 %7) -// ret i32 %7 -// } -Function* PartitionedHashTableCtx::codegen_hash_current_row(RuntimeState* state, bool use_murmur) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) return NULL; - } - - LlvmCodeGen* codegen; - if (!state->get_codegen(&codegen).ok()) return NULL; - - // Get types to generate function prototype - Type* this_type = codegen->get_type(PartitionedHashTableCtx::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, - (use_murmur ? "MurmurHashCurrentRow" : "HashCurrentRow"), - codegen->get_type(TYPE_INT)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* this_arg; - Function* fn = prototype.generate_prototype(&builder, &this_arg); - - // Call get_hash_seed() to get _seeds[_level] - Function* get_hash_seed_fn = codegen->GetFunction(IRFunction::HASH_TABLE_GET_HASH_SEED); - Value* seed = builder.CreateCall(get_hash_seed_fn, this_arg, "seed"); - - Value* hash_result = seed; - Value* data = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), _expr_values_buffer); - if (_var_result_begin == -1) { - // No variable length slots, just hash what is in '_expr_values_buffer' - if (_results_buffer_size > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(_results_buffer_size) : - codegen->GetHashFunction(_results_buffer_size); - Value* len = codegen->get_int_constant(TYPE_INT, _results_buffer_size); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result, "hash"); - } - } else { - if (_var_result_begin > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(_var_result_begin) : - codegen->GetHashFunction(_var_result_begin); - Value* len = codegen->get_int_constant(TYPE_INT, _var_result_begin); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result, "hash"); - } - - // Hash string slots - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // if (_build_expr_ctxs[i]->root()->type().type != TYPE_STRING - // && _build_expr_ctxs[i]->root()->type().type != TYPE_VARCHAR) continue; - if (_build_expr_ctxs[i]->root()->type().type != TYPE_VARCHAR) { - continue; - } - - BasicBlock* null_block = NULL; - BasicBlock* not_null_block = NULL; - BasicBlock* continue_block = NULL; - Value* str_null_result = NULL; - - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - - // If the hash table stores nulls, we need to check if the stringval - // evaluated to NULL - if (_stores_nulls) { - null_block = BasicBlock::Create(context, "null", fn); - not_null_block = BasicBlock::Create(context, "not_null", fn); - continue_block = BasicBlock::Create(context, "continue", fn); - - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte"); - Value* is_null = builder.CreateICmpNE(null_byte, - codegen->get_int_constant(TYPE_TINYINT, 0), "is_null"); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // For null, we just want to call the hash function on the portion of - // the data - builder.SetInsertPoint(null_block); - Function* null_hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(sizeof(StringValue)) : - codegen->GetHashFunction(sizeof(StringValue)); - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), loc); - Value* len = codegen->get_int_constant(TYPE_INT, sizeof(StringValue)); - str_null_result = - builder.CreateCall3(null_hash_fn, llvm_loc, len, hash_result, "str_null"); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(not_null_block); - } - - // Convert _expr_values_buffer loc to llvm value - // Value* str_val = codegen->cast_ptr_to_llvm_ptr(codegen->get_ptr_type(TYPE_STRING), loc); - Value* str_val = codegen->cast_ptr_to_llvm_ptr(codegen->get_ptr_type(TYPE_VARCHAR), loc); - - Value* ptr = builder.CreateStructGEP(str_val, 0); - Value* len = builder.CreateStructGEP(str_val, 1); - ptr = builder.CreateLoad(ptr, "ptr"); - len = builder.CreateLoad(len, "len"); - - // Call hash(ptr, len, hash_result); - Function* general_hash_fn = use_murmur ? codegen->GetMurmurHashFunction() : - codegen->GetHashFunction(); - Value* string_hash_result = - builder.CreateCall3(general_hash_fn, ptr, len, hash_result, "string_hash"); - - if (_stores_nulls) { - builder.CreateBr(continue_block); - builder.SetInsertPoint(continue_block); - // Use phi node to reconcile that we could have come from the string-null - // path and string not null paths. - PHINode* phi_node = builder.CreatePHI(codegen->get_type(TYPE_INT), 2, "hash_phi"); - phi_node->addIncoming(string_hash_result, not_null_block); - phi_node->addIncoming(str_null_result, null_block); - hash_result = phi_node; - } else { - hash_result = string_hash_result; - } - } - } - - builder.CreateRet(hash_result); - return codegen->FinalizeFunction(fn); -} - -// Codegen for PartitionedHashTableCtx::equals. For a hash table with two exprs (string,int), -// the IR looks like: -// -// define i1 @equals(%"class.impala::PartitionedHashTableCtx"* %this_ptr, -// %"class.impala::TupleRow"* %row) { -// entry: -// %result = call i64 @GetSlotRef(%"class.impala::ExprContext"* inttoptr -// (i64 146381856 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %0 = trunc i64 %result to i1 -// br i1 %0, label %null, label %not_null -// -// false_block: ; preds = %not_null2, %null1, %not_null, %null -// ret i1 false -// -// null: ; preds = %entry -// br i1 false, label %continue, label %false_block -// -// not_null: ; preds = %entry -// %1 = load i32* inttoptr (i64 104774368 to i32*) -// %2 = ashr i64 %result, 32 -// %3 = trunc i64 %2 to i32 -// %cmp_raw = icmp eq i32 %3, %1 -// br i1 %cmp_raw, label %continue, label %false_block -// -// continue: ; preds = %not_null, %null -// %result4 = call { i64, i8* } @GetSlotRef1( -// %"class.impala::ExprContext"* inttoptr -// (i64 146381696 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %4 = extractvalue { i64, i8* } %result4, 0 -// %5 = trunc i64 %4 to i1 -// br i1 %5, label %null1, label %not_null2 -// -// null1: ; preds = %continue -// br i1 false, label %continue3, label %false_block -// -// not_null2: ; preds = %continue -// %6 = extractvalue { i64, i8* } %result4, 0 -// %7 = ashr i64 %6, 32 -// %8 = trunc i64 %7 to i32 -// %result5 = extractvalue { i64, i8* } %result4, 1 -// %cmp_raw6 = call i1 @_Z11StringValEQPciPKN6impala11StringValueE( -// i8* %result5, i32 %8, %"struct.impala::StringValue"* inttoptr -// (i64 104774384 to %"struct.impala::StringValue"*)) -// br i1 %cmp_raw6, label %continue3, label %false_block -// -// continue3: ; preds = %not_null2, %null1 -// ret i1 true -// } -Function* PartitionedHashTableCtx::codegen_equals(RuntimeState* state) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) return NULL; - } - - LlvmCodeGen* codegen; - if (!state->get_codegen(&codegen).ok()) return NULL; - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(PartitionedHashTableCtx::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, "Equals", codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* row = args[1]; - - BasicBlock* false_block = BasicBlock::Create(context, "false_block", fn); - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // call get_value on build_exprs[i] - Function* expr_fn; - Status status = _build_expr_ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - // VLOG_QUERY << "Problem with codegen_equals: " << status.GetDetail(); - VLOG_QUERY << "Problem with codegen_equals: " << status.get_error_msg(); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), _build_expr_ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped(codegen, &builder, - _build_expr_ctxs[i]->root()->type(), expr_fn, expr_fn_args, "result"); - Value* is_null = result.get_is_null(); - - // Determine if probe is null (i.e. _expr_value_null_bits[i] == true). In - // the case where the hash table does not store nulls, this is always false. - Value* probe_is_null = codegen->false_value(); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - if (_stores_nulls) { - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - probe_is_null = builder.CreateICmpNE(null_byte, - codegen->get_int_constant(TYPE_TINYINT, 0)); - } - - // Get llvm value for probe_val from '_expr_values_buffer' - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* probe_val = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(_build_expr_ctxs[i]->root()->type()), loc); - - // Branch for get_value() returning NULL - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - builder.CreateCondBr(probe_is_null, continue_block, false_block); - - // Not-null block - builder.SetInsertPoint(not_null_block); - if (_stores_nulls) { - BasicBlock* cmp_block = BasicBlock::Create(context, "cmp", fn); - // First need to compare that probe expr[i] is not null - builder.CreateCondBr(probe_is_null, false_block, cmp_block); - builder.SetInsertPoint(cmp_block); - } - // Check result == probe_val - Value* is_equal = result.EqToNativePtr(probe_val); - builder.CreateCondBr(is_equal, continue_block, false_block); - - builder.SetInsertPoint(continue_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - - return codegen->FinalizeFunction(fn); -} -#endif - } // namespace doris diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h index 3c5b142418..94f6c9847f 100644 --- a/be/src/exec/partitioned_hash_table.h +++ b/be/src/exec/partitioned_hash_table.h @@ -33,15 +33,10 @@ #include "util/hash_util.hpp" #include "util/bit_util.h" -namespace llvm { - class Function; -} - namespace doris { class Expr; class ExprContext; -class LlvmCodeGen; class MemTracker; class MemTracker; class RowDescriptor; @@ -156,24 +151,6 @@ public: int results_buffer_size() const { return _results_buffer_size; } - // Codegen for evaluating a tuple row. Codegen'd function matches the signature - // for EvalBuildRow and EvalTupleRow. - // If build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs. - llvm::Function* codegen_eval_row(RuntimeState* state, bool build_row); - - // Codegen for evaluating a TupleRow and comparing equality against - // '_expr_values_buffer'. Function signature matches PartitionedHashTable::equals(). - llvm::Function* codegen_equals(RuntimeState* state); - - // Codegen for hashing the expr values in '_expr_values_buffer'. Function prototype - // matches HashCurrentRow identically. Unlike HashCurrentRow(), the returned function - // only uses a single hash function, rather than switching based on _level. - // If 'use_murmur' is true, murmur hash is used, otherwise CRC is used if the hardware - // supports it (see hash-util.h). - llvm::Function* codegen_hash_current_row(RuntimeState* state, bool use_murmur); - - static const char* _s_llvm_class_name; - private: friend class PartitionedHashTable; friend class PartitionedHashTableTest_HashEmpty_Test; diff --git a/be/src/exec/topn_node.cpp b/be/src/exec/topn_node.cpp index e1820b299c..e83346904e 100644 --- a/be/src/exec/topn_node.cpp +++ b/be/src/exec/topn_node.cpp @@ -70,13 +70,6 @@ Status TopNNode::prepare(RuntimeState* state) { _tuple_row_less_than.reset( new TupleRowComparator(_sort_exec_exprs, _is_asc_order, _nulls_first)); - if (state->codegen_level() > 0) { - bool success = _tuple_row_less_than->codegen(state); - if (success) { - // AddRuntimeExecOption("Codegen Enabled"); - } - } - _abort_on_default_limit_exceeded = _abort_on_default_limit_exceeded && state->abort_on_default_limit_exceeded(); _materialized_tuple_desc = _row_descriptor.tuple_descriptors()[0]; diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp index 2b57e8c0da..86ff768be3 100644 --- a/be/src/exec/union_node.cpp +++ b/be/src/exec/union_node.cpp @@ -17,7 +17,6 @@ #include "exec/union_node.h" -#include "codegen/llvm_codegen.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "runtime/row_batch.h" @@ -30,8 +29,6 @@ // #include "common/names.h" -using namespace llvm; - namespace doris { UnionNode::UnionNode(ObjectPool* pool, const TPlanNode& tnode, @@ -96,52 +93,6 @@ Status UnionNode::prepare(RuntimeState* state) { return Status::OK(); } -void UnionNode::codegen(RuntimeState* state) { -#if 0 - DCHECK(state->ShouldCodegen()); - ExecNode::codegen(state); - if (IsNodeCodegenDisabled()) return; - - LlvmCodeGen* codegen = state->codegen(); - DCHECK(codegen != nullptr); - std::stringstream codegen_message; - Status codegen_status; - for (int i = 0; i < _child_expr_lists.size(); ++i) { - if (is_child_passthrough(i)) continue; - - llvm::Function* tuple_materialize_exprs_fn; - codegen_status = Tuple::CodegenMaterializeExprs(codegen, false, *_tuple_desc, - _child_expr_lists[i], true, &tuple_materialize_exprs_fn); - if (!codegen_status.ok()) { - // Codegen may fail in some corner cases (e.g. we don't handle TYPE_CHAR). If this - // happens, abort codegen for this and the remaining children. - codegen_message << "Codegen failed for child: " << _children[i]->id(); - break; - } - - // Get a copy of the function. This function will be modified and added to the - // vector of functions. - Function* union_materialize_batch_fn = - codegen->GetFunction(IRFunction::UNION_MATERIALIZE_BATCH, true); - DCHECK(union_materialize_batch_fn != nullptr); - - int replaced = codegen->ReplaceCallSites(union_materialize_batch_fn, - tuple_materialize_exprs_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL); - DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(union_materialize_batch_fn); - - union_materialize_batch_fn = codegen->FinalizeFunction( - union_materialize_batch_fn); - DCHECK(union_materialize_batch_fn != nullptr); - - // Add the function to Jit and to the vector of codegened functions. - codegen->AddFunctionToJit(union_materialize_batch_fn, - reinterpret_cast(&(_codegend_union_materialize_batch_fns.data()[i]))); - } - runtime_profile()->AddCodegenMsg( - codegen_status.ok(), codegen_status, codegen_message.str()); -#endif -} - Status UnionNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::open(state)); diff --git a/be/src/exec/union_node.h b/be/src/exec/union_node.h index b12fb597a1..70f65d1e05 100644 --- a/be/src/exec/union_node.h +++ b/be/src/exec/union_node.h @@ -45,7 +45,6 @@ public: virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); virtual Status prepare(RuntimeState* state); - virtual void codegen(RuntimeState* state); virtual Status open(RuntimeState* state); virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); // virtual Status reset(RuntimeState* state);