diff --git a/be/src/exec/aggregation_node.cpp b/be/src/exec/aggregation_node.cpp index 1b3ff19147..1f50f46378 100644 --- a/be/src/exec/aggregation_node.cpp +++ b/be/src/exec/aggregation_node.cpp @@ -24,7 +24,6 @@ #include #include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exec/hash_table.hpp" #include "exprs/agg_fn_evaluator.h" #include "exprs/expr.h" @@ -41,17 +40,8 @@ #include "runtime/tuple_row.h" #include "util/runtime_profile.h" -using llvm::BasicBlock; -using llvm::Function; -using llvm::PointerType; -using llvm::Type; -using llvm::Value; -using llvm::StructType; - namespace doris { -const char* AggregationNode::_s_llvm_class_name = "class.doris::AggregationNode"; - // TODO: pass in maximum size; enforce by setting limit in mempool // TODO: have a Status ExecNode::init(const TPlanNode&) member function // that does initialization outside of c'tor, so we can indicate errors @@ -65,7 +55,6 @@ AggregationNode::AggregationNode( _singleton_output_tuple(NULL), //_tuple_pool(new MemPool()), // - _codegen_process_row_batch_fn(NULL), _process_row_batch_fn(NULL), _needs_finalize(tnode.agg_node.need_finalize), _build_timer(NULL), @@ -156,22 +145,6 @@ Status AggregationNode::prepare(RuntimeState* state) { _singleton_output_tuple = construct_intermediate_tuple(); } - if (state->codegen_level() > 0) { - LlvmCodeGen* codegen = NULL; - RETURN_IF_ERROR(state->get_codegen(&codegen)); - Function* update_tuple_fn = codegen_update_tuple(state); - if (update_tuple_fn != NULL) { - _codegen_process_row_batch_fn = - codegen_process_row_batch(state, update_tuple_fn); - if (_codegen_process_row_batch_fn != NULL) { - // Update to using codegen'd process row batch. - codegen->add_function_to_jit(_codegen_process_row_batch_fn, - reinterpret_cast(&_process_row_batch_fn)); - // AddRuntimeExecOption("Codegen Enabled"); - } - } - } - return Status::OK(); } @@ -481,539 +454,5 @@ void AggregationNode::push_down_predicate(RuntimeState *state, return; } -static IRFunction::Type get_hll_update_function2(const TypeDescriptor& type) { - switch (type.type) { - case TYPE_BOOLEAN: - return IRFunction::HLL_UPDATE_BOOLEAN; - case TYPE_TINYINT: - return IRFunction::HLL_UPDATE_TINYINT; - case TYPE_SMALLINT: - return IRFunction::HLL_UPDATE_SMALLINT; - case TYPE_INT: - return IRFunction::HLL_UPDATE_INT; - case TYPE_BIGINT: - return IRFunction::HLL_UPDATE_BIGINT; - case TYPE_FLOAT: - return IRFunction::HLL_UPDATE_FLOAT; - case TYPE_DOUBLE: - return IRFunction::HLL_UPDATE_DOUBLE; - case TYPE_CHAR: - case TYPE_VARCHAR: - return IRFunction::HLL_UPDATE_STRING; - case TYPE_DECIMAL: - return IRFunction::HLL_UPDATE_DECIMAL; - default: - DCHECK(false) << "Unsupported type: " << type; - return IRFunction::FN_END; - } -} - -// IR Generation for updating a single aggregation slot. Signature is: -// void update_slot(FunctionContext* fn_ctx, AggTuple* agg_tuple, char** row) -// -// The IR for sum(double_col) is: -// define void @update_slot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, double }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 128241264 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %dst_val = load double* %dst_slot_ptr -// %val = extractvalue { i8, double } %src, 1 -// %1 = fadd double %dst_val, %val -// store double %1, double* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -// -// The IR for min(double_col) is: -// define void @update_slot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, double }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 128241264 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_is_null = call i8 @is_null(tuple); -// br i1 %dst_is_null, label dst_null, label dst_not_null -// -// dst_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %val = extractvalue { i8, double } %src, 1 -// store double %val, double* %dst_slot_ptr -// br label %ret -// -// dst_not_null: ; preds = %src_not_null -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %dst_val = load double* %dst_slot_ptr -// %val = extractvalue { i8, double } %src, 1 -// %1 = fadd double %dst_val, %val -// store double %1, double* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -// The IR for ndv(double_col) is: -// define void @update_slot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, %"struct.doris::StringValue" }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %dst_lowered_ptr = alloca { i64, i8* } -// %src_lowered_ptr = alloca { i8, double } -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 120530832 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds -// { i8, %"struct.doris::StringValue" }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, %"struct.doris::StringValue" }* %agg_tuple) -// %dst_val = load %"struct.doris::StringValue"* %dst_slot_ptr -// store { i8, double } %src, { i8, double }* %src_lowered_ptr -// %src_unlowered_ptr = bitcast { i8, double }* %src_lowered_ptr -// to %"struct.doris_udf::DoubleVal"* -// %ptr = extractvalue %"struct.doris::StringValue" %dst_val, 0 -// %dst_stringval = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1 -// %len = extractvalue %"struct.doris::StringValue" %dst_val, 1 -// %1 = extractvalue { i64, i8* } %dst_stringval, 0 -// %2 = zext i32 %len to i64 -// %3 = shl i64 %2, 32 -// %4 = and i64 %1, 4294967295 -// %5 = or i64 %4, %3 -// %dst_stringval1 = insertvalue { i64, i8* } %dst_stringval, i64 %5, 0 -// store { i64, i8* } %dst_stringval1, { i64, i8* }* %dst_lowered_ptr -// %dst_unlowered_ptr = bitcast { i64, i8* }* %dst_lowered_ptr -// to %"struct.doris_udf::StringVal"* -// call void @HllUpdate(%"class.doris_udf::FunctionContext"* %fn_ctx, -// %"struct.doris_udf::DoubleVal"* %src_unlowered_ptr, -// %"struct.doris_udf::StringVal"* %dst_unlowered_ptr) -// %anyval_result = load { i64, i8* }* %dst_lowered_ptr -// %6 = extractvalue { i64, i8* } %anyval_result, 1 -// %7 = insertvalue %"struct.doris::StringValue" zeroinitializer, i8* %6, 0 -// %8 = extractvalue { i64, i8* } %anyval_result, 0 -// %9 = ashr i64 %8, 32 -// %10 = trunc i64 %9 to i32 -// %11 = insertvalue %"struct.doris::StringValue" %7, i32 %10, 1 -// store %"struct.doris::StringValue" %11, %"struct.doris::StringValue"* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -llvm::Function* AggregationNode::codegen_update_slot( - RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc) { - DCHECK(slot_desc->is_materialized()); - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1); - ExprContext* input_expr_ctx = evaluator->input_expr_ctxs()[0]; - Expr* input_expr = input_expr_ctx->root(); - // TODO: implement timestamp - if (input_expr->type().type == TYPE_DATETIME - || input_expr->type().type == TYPE_DATE - || input_expr->type().type == TYPE_DECIMAL - || input_expr->type().is_string_type()) { - return NULL; - } - Function* agg_expr_fn = NULL; - Status status = input_expr->get_codegend_compute_fn(state, &agg_expr_fn); - if (!status.ok()) { - LOG(INFO) << "Could not codegen update_slot(): " << status.get_error_msg(); - return NULL; - } - DCHECK(agg_expr_fn != NULL); - - PointerType* fn_ctx_type = - codegen->get_ptr_type(FunctionContextImpl::_s_llvm_functioncontext_name); - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - PointerType* tuple_ptr_type = PointerType::get(tuple_struct, 0); - PointerType* tuple_row_ptr_type = codegen->get_ptr_type(TupleRow::_s_llvm_class_name); - - // Create update_slot prototype - LlvmCodeGen::FnPrototype prototype(codegen, "update_slot", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("fn_ctx", fn_ctx_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - Value* fn_ctx_arg = args[0]; - Value* agg_tuple_arg = args[1]; - Value* row_arg = args[2]; - - BasicBlock* src_not_null_block = NULL; - BasicBlock* dst_null_block = NULL; - BasicBlock* dst_not_null_block = NULL; - if (evaluator->agg_op() == AggFnEvaluator::MIN - || evaluator->agg_op() == AggFnEvaluator::MAX) { - src_not_null_block = BasicBlock::Create(codegen->context(), "src_not_null", fn); - dst_null_block = BasicBlock::Create(codegen->context(), "dst_null", fn); - } - dst_not_null_block = BasicBlock::Create(codegen->context(), "dst_not_null", fn); - BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", fn); - - // Call expr function to get src slot value - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), input_expr_ctx); - Value* agg_expr_fn_args[] = { ctx_arg, row_arg }; - CodegenAnyVal src = CodegenAnyVal::create_call_wrapped( - codegen, &builder, input_expr->type(), agg_expr_fn, agg_expr_fn_args, "src", NULL); - - Value* src_is_null = src.get_is_null(); - if (evaluator->agg_op() == AggFnEvaluator::MIN - || evaluator->agg_op() == AggFnEvaluator::MAX) { - builder.CreateCondBr(src_is_null, ret_block, src_not_null_block); - - // Src slot is not null - builder.SetInsertPoint(src_not_null_block); - Function* is_null_fn = slot_desc->codegen_is_null(codegen, tuple_struct); - Value* dst_is_null = builder.CreateCall(is_null_fn, agg_tuple_arg); - builder.CreateCondBr(dst_is_null, dst_null_block, dst_not_null_block); - // dst slot is null - builder.SetInsertPoint(dst_null_block); - Value* dst_ptr = - builder.CreateStructGEP(agg_tuple_arg, slot_desc->field_idx(), "dst_slot_ptr"); - if (slot_desc->is_nullable()) { - // Dst is NULL, just update dst slot to src slot and clear null bit - Function* clear_null_fn = slot_desc->codegen_update_null(codegen, tuple_struct, false); - builder.CreateCall(clear_null_fn, agg_tuple_arg); - } - builder.CreateStore(src.get_val(), dst_ptr); - builder.CreateBr(ret_block); - } else { - builder.CreateCondBr(src_is_null, ret_block, dst_not_null_block); - } - - - // Src slot is not null, update dst_slot - builder.SetInsertPoint(dst_not_null_block); - Value* dst_ptr = - builder.CreateStructGEP(agg_tuple_arg, slot_desc->field_idx(), "dst_slot_ptr"); - Value* result = NULL; - - if (slot_desc->is_nullable()) { - // Dst is NULL, just update dst slot to src slot and clear null bit - Function* clear_null_fn = slot_desc->codegen_update_null(codegen, tuple_struct, false); - builder.CreateCall(clear_null_fn, agg_tuple_arg); - } - - // Update the slot - Value* dst_value = builder.CreateLoad(dst_ptr, "dst_val"); - switch (evaluator->agg_op()) { - case AggFnEvaluator::COUNT: - if (evaluator->is_merge()) { - result = builder.CreateAdd(dst_value, src.get_val(), "count_sum"); - } else { - result = builder.CreateAdd( - dst_value, codegen->get_int_constant(TYPE_BIGINT, 1), "count_inc"); - } - break; - case AggFnEvaluator::MIN: { - Function* min_fn = codegen->codegen_min_max(slot_desc->type(), true); - Value* min_args[] = { dst_value, src.get_val() }; - result = builder.CreateCall(min_fn, min_args, "min_value"); - break; - } - case AggFnEvaluator::MAX: { - Function* max_fn = codegen->codegen_min_max(slot_desc->type(), false); - Value* max_args[] = { dst_value, src.get_val() }; - result = builder.CreateCall(max_fn, max_args, "max_value"); - break; - } - case AggFnEvaluator::SUM: - if (slot_desc->type().type == TYPE_FLOAT || slot_desc->type().type == TYPE_DOUBLE) { - result = builder.CreateFAdd(dst_value, src.get_val()); - } else { - result = builder.CreateAdd(dst_value, src.get_val()); - } - break; - case AggFnEvaluator::NDV: { - DCHECK_EQ(slot_desc->type().type, TYPE_VARCHAR); - IRFunction::Type ir_function_type = evaluator->is_merge() ? IRFunction::HLL_MERGE - : get_hll_update_function2(input_expr->type()); - Function* hll_fn = codegen->get_function(ir_function_type); - - // Create pointer to src_anyval to pass to HllUpdate() function. We must use the - // unlowered type. - Value* src_lowered_ptr = codegen->create_entry_block_alloca( - fn, LlvmCodeGen::NamedVariable("src_lowered_ptr", src.value()->getType())); - builder.CreateStore(src.value(), src_lowered_ptr); - Type* unlowered_ptr_type = - CodegenAnyVal::get_unlowered_type(codegen, input_expr->type())->getPointerTo(); - Value* src_unlowered_ptr = - builder.CreateBitCast(src_lowered_ptr, unlowered_ptr_type, "src_unlowered_ptr"); - - // Create StringVal* intermediate argument from dst_value - CodegenAnyVal dst_stringval = CodegenAnyVal::get_non_null_val( - codegen, &builder, TypeDescriptor(TYPE_VARCHAR), "dst_stringval"); - dst_stringval.set_from_raw_value(dst_value); - // Create pointer to dst_stringval to pass to HllUpdate() function. We must use - // the unlowered type. - Value* dst_lowered_ptr = codegen->create_entry_block_alloca( - fn, LlvmCodeGen::NamedVariable("dst_lowered_ptr", - dst_stringval.value()->getType())); - builder.CreateStore(dst_stringval.value(), dst_lowered_ptr); - unlowered_ptr_type = - codegen->get_ptr_type(CodegenAnyVal::get_unlowered_type( - codegen, TypeDescriptor(TYPE_VARCHAR))); - Value* dst_unlowered_ptr = - builder.CreateBitCast(dst_lowered_ptr, unlowered_ptr_type, "dst_unlowered_ptr"); - - // Call 'hll_fn' - builder.CreateCall3(hll_fn, fn_ctx_arg, src_unlowered_ptr, dst_unlowered_ptr); - - // Convert StringVal intermediate 'dst_arg' back to StringValue - Value* anyval_result = builder.CreateLoad(dst_lowered_ptr, "anyval_result"); - result = CodegenAnyVal(codegen, &builder, TypeDescriptor(TYPE_VARCHAR), anyval_result) - .to_native_value(); - break; - } - default: - DCHECK(false) << "bad aggregate operator: " << evaluator->agg_op(); - } - - builder.CreateStore(result, dst_ptr); - builder.CreateBr(ret_block); - - builder.SetInsertPoint(ret_block); - builder.CreateRetVoid(); - - fn = codegen->finalize_function(fn); - return fn; -} - -// IR codegen for the update_tuple loop. This loop is query specific and -// based on the aggregate functions. The function signature must match the non- -// codegen'd update_tuple exactly. -// For the query: -// select count(*), count(int_col), sum(double_col) the IR looks like: -// -// define void @update_tuple(%"class.doris::AggregationNode"* %this_ptr, -// %"class.doris::Tuple"* %agg_tuple, -// %"class.doris::TupleRow"* %tuple_row) #20 { -// entry: -// %tuple = bitcast %"class.doris::Tuple"* %agg_tuple to { i8, i64, i64, double }* -// %src_slot = getelementptr inbounds { i8, i64, i64, double }* %tuple, i32 0, i32 1 -// %count_star_val = load i64* %src_slot -// %count_star_inc = add i64 %count_star_val, 1 -// store i64 %count_star_inc, i64* %src_slot -// call void @update_slot(%"class.doris_udf::FunctionContext"* inttoptr -// (i64 44521296 to %"class.doris_udf::FunctionContext"*), -// { i8, i64, i64, double }* %tuple, -// %"class.doris::TupleRow"* %tuple_row) -// call void @UpdateSlot5(%"class.doris_udf::FunctionContext"* inttoptr -// (i64 44521328 to %"class.doris_udf::FunctionContext"*), -// { i8, i64, i64, double }* %tuple, -// %"class.doris::TupleRow"* %tuple_row) -// ret void -// } -Function* AggregationNode::codegen_update_tuple(RuntimeState* state) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - - int j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - - // Timestamp and char are never supported. NDV supports decimal and string but no - // other functions. - // TODO: the other aggregate functions might work with decimal as-is - // TODO(zc) - if (slot_desc->type().type == TYPE_DATETIME || slot_desc->type().type == TYPE_CHAR || - (evaluator->agg_op() != AggFnEvaluator::NDV && - (slot_desc->type().type == TYPE_DECIMAL || - slot_desc->type().type == TYPE_CHAR || - slot_desc->type().type == TYPE_VARCHAR))) { - LOG(INFO) << "Could not codegen UpdateIntermediateTuple because " - << "string, char, timestamp and decimal are not yet supported."; - return NULL; - } - if (evaluator->agg_op() == AggFnEvaluator::COUNT_DISTINCT - || evaluator->agg_op() == AggFnEvaluator::SUM_DISTINCT) { - return NULL; - } - - // Don't codegen things that aren't builtins (for now) - if (!evaluator->is_builtin()) { - return NULL; - } - } - - if (_intermediate_tuple_desc->generate_llvm_struct(codegen) == NULL) { - LOG(INFO) << "Could not codegen update_tuple because we could" - << "not generate a matching llvm struct for the intermediate tuple."; - return NULL; - } - - // Get the types to match the update_tuple signature - Type* agg_node_type = codegen->get_type(AggregationNode::_s_llvm_class_name); - Type* agg_tuple_type = codegen->get_type(Tuple::_s_llvm_class_name); - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - - DCHECK(agg_node_type != NULL); - DCHECK(agg_tuple_type != NULL); - DCHECK(tuple_row_type != NULL); - - PointerType* agg_node_ptr_type = PointerType::get(agg_node_type, 0); - PointerType* agg_tuple_ptr_type = PointerType::get(agg_tuple_type, 0); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - // Signature for update_tuple is - // void update_tuple(AggregationNode* this, Tuple* tuple, TupleRow* row) - // This signature needs to match the non-codegen'd signature exactly. - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - PointerType* tuple_ptr = PointerType::get(tuple_struct, 0); - LlvmCodeGen::FnPrototype prototype(codegen, "update_tuple", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", agg_node_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_tuple", agg_tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("tuple_row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - - // Cast the parameter types to the internal llvm runtime types. - // TODO: get rid of this by using right type in function signature - args[1] = builder.CreateBitCast(args[1], tuple_ptr, "tuple"); - - // Loop over each expr and generate the IR for that slot. If the expr is not - // count(*), generate a helper IR function to update the slot and call that. - j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - if (evaluator->is_count_star()) { - // TODO: we should be able to hoist this up to the loop over the batch and just - // increment the slot by the number of rows in the batch. - int field_idx = slot_desc->field_idx(); - Value* const_one = codegen->get_int_constant(TYPE_BIGINT, 1); - Value* slot_ptr = builder.CreateStructGEP(args[1], field_idx, "src_slot"); - Value* slot_loaded = builder.CreateLoad(slot_ptr, "count_star_val"); - Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc"); - builder.CreateStore(count_inc, slot_ptr); - } else { - Function* update_slot_fn = codegen_update_slot(state, evaluator, slot_desc); - if (update_slot_fn == NULL) { - return NULL; - } - Value* fn_ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(FunctionContextImpl::_s_llvm_functioncontext_name), - _agg_fn_ctxs[i]); - builder.CreateCall3(update_slot_fn, fn_ctx_arg, args[1], args[2]); - } - } - builder.CreateRetVoid(); - - // CodegenProcessRowBatch() does the final optimizations. - return codegen->finalize_function(fn); -} - -Function* AggregationNode::codegen_process_row_batch( - RuntimeState* state, Function* update_tuple_fn) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - DCHECK(update_tuple_fn != NULL); - - // Get the cross compiled update row batch function - IRFunction::Type ir_fn = - (!_probe_expr_ctxs.empty() ? IRFunction::AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING - : IRFunction::AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING); - Function* process_batch_fn = codegen->get_function(ir_fn); - if (process_batch_fn == NULL) { - LOG(ERROR) << "Could not find AggregationNode::ProcessRowBatch in module."; - return NULL; - } - - int replaced = 0; - if (!_probe_expr_ctxs.empty()) { - // Aggregation w/o grouping does not use a hash table. - - // Codegen for hash - Function* hash_fn = _hash_tbl->codegen_hash_current_row(state); - if (hash_fn == NULL) { - return NULL; - } - - // Codegen HashTable::Equals - Function* equals_fn = _hash_tbl->codegen_equals(state); - if (equals_fn == NULL) { - return NULL; - } - - // Codegen for evaluating build rows - Function* eval_build_row_fn = _hash_tbl->codegen_eval_tuple_row(state, true); - if (eval_build_row_fn == NULL) { - return NULL; - } - - // Codegen for evaluating probe rows - Function* eval_probe_row_fn = _hash_tbl->codegen_eval_tuple_row(state, false); - if (eval_probe_row_fn == NULL) { - return NULL; - } - - // Replace call sites - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, eval_build_row_fn, "eval_build_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, eval_probe_row_fn, "eval_probe_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, hash_fn, "hash_current_row", &replaced); - DCHECK_EQ(replaced, 2); - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, equals_fn, "equals", &replaced); - DCHECK_EQ(replaced, 1); - } - - process_batch_fn = codegen->replace_call_sites( - process_batch_fn, false, update_tuple_fn, "update_tuple", &replaced); - DCHECK_EQ(replaced, 1) << "One call site should be replaced."; - DCHECK(process_batch_fn != NULL); - return codegen->optimize_function_with_exprs(process_batch_fn); -} } diff --git a/be/src/exec/aggregation_node.h b/be/src/exec/aggregation_node.h index 9100739221..72e9f0ad0b 100644 --- a/be/src/exec/aggregation_node.h +++ b/be/src/exec/aggregation_node.h @@ -28,14 +28,9 @@ #include "runtime/mem_pool.h" #include "runtime/string_value.h" -namespace llvm { -class Function; -} - namespace doris { class AggFnEvaluator; -class LlvmCodeGen; class RowBatch; class RuntimeState; struct StringValue; @@ -69,7 +64,6 @@ public: virtual void push_down_predicate( RuntimeState *state, std::list *expr_ctxs); - static const char* _s_llvm_class_name; private: boost::scoped_ptr _hash_tbl; HashTable::Iterator _output_iterator; @@ -98,9 +92,6 @@ private: Tuple* _singleton_output_tuple; // result of aggregation w/o GROUP BY boost::scoped_ptr _tuple_pool; - /// IR for process row batch. NULL if codegen is disabled. - llvm::Function* _codegen_process_row_batch_fn; - typedef void (*ProcessRowBatchFn)(AggregationNode*, RowBatch*); // Jitted ProcessRowBatch function pointer. Null if codegen is disabled. ProcessRowBatchFn _process_row_batch_fn; @@ -136,21 +127,6 @@ private: // Do the aggregation for all tuple rows in the batch void process_row_batch_no_grouping(RowBatch* batch, MemPool* pool); void process_row_batch_with_grouping(RowBatch* batch, MemPool* pool); - - /// Codegen the process row batch loop. The loop has already been compiled to - /// IR and loaded into the codegen object. UpdateAggTuple has also been - /// codegen'd to IR. This function will modify the loop subsituting the - /// UpdateAggTuple function call with the (inlined) codegen'd 'update_tuple_fn'. - llvm::Function* codegen_process_row_batch( - RuntimeState* state, llvm::Function* update_tuple_fn); - - /// Codegen for updating aggregate_exprs at slot_idx. Returns NULL if unsuccessful. - /// slot_idx is the idx into aggregate_exprs_ (does not include grouping exprs). - llvm::Function* codegen_update_slot( - RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc); - - /// Codegen UpdateTuple(). Returns NULL if codegen is unsuccessful. - llvm::Function* codegen_update_tuple(RuntimeState* state); }; } diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp index ed626966ce..e174973f1c 100644 --- a/be/src/exec/blocking_join_node.cpp +++ b/be/src/exec/blocking_join_node.cpp @@ -27,8 +27,6 @@ namespace doris { -const char* BlockingJoinNode::LLVM_CLASS_NAME = "class.doris::BlockingJoinNode"; - BlockingJoinNode::BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, ObjectPool* pool, diff --git a/be/src/exec/blocking_join_node.h b/be/src/exec/blocking_join_node.h index e1595c505d..964f996203 100644 --- a/be/src/exec/blocking_join_node.h +++ b/be/src/exec/blocking_join_node.h @@ -57,8 +57,6 @@ public: // BlockingJoinNode::close(). virtual Status close(RuntimeState* state); - static const char* LLVM_CLASS_NAME; - private: const std::string _node_name; TJoinOp::type _join_op; diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 6a75ce2af5..d8bd463216 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -21,7 +21,6 @@ #include #include -#include "codegen/llvm_codegen.h" #include "codegen/codegen_anyval.h" #include "common/object_pool.h" #include "common/status.h" @@ -62,13 +61,6 @@ #include "util/debug_util.h" #include "util/runtime_profile.h" -using llvm::Function; -using llvm::PointerType; -using llvm::Type; -using llvm::Value; -using llvm::LLVMContext; -using llvm::BasicBlock; - namespace doris { const std::string ExecNode::ROW_THROUGHPUT_COUNTER = "RowsReturnedRate"; @@ -586,118 +578,6 @@ Status ExecNode::exec_debug_action(TExecNodePhase::type phase) { return Status::OK(); } -// Codegen for EvalConjuncts. The generated signature is -// For a node with two conjunct predicates -// define i1 @EvalConjuncts(%"class.impala::ExprContext"** %ctxs, i32 %num_ctxs, -// %"class.impala::TupleRow"* %row) #20 { -// entry: -// %ctx_ptr = getelementptr %"class.impala::ExprContext"** %ctxs, i32 0 -// %ctx = load %"class.impala::ExprContext"** %ctx_ptr -// %result = call i16 @Eq_StringVal_StringValWrapper3( -// %"class.impala::ExprContext"* %ctx, %"class.impala::TupleRow"* %row) -// %is_null = trunc i16 %result to i1 -// %0 = ashr i16 %result, 8 -// %1 = trunc i16 %0 to i8 -// %val = trunc i8 %1 to i1 -// %is_false = xor i1 %val, true -// %return_false = or i1 %is_null, %is_false -// br i1 %return_false, label %false, label %continue -// -// continue: ; preds = %entry -// %ctx_ptr2 = getelementptr %"class.impala::ExprContext"** %ctxs, i32 1 -// %ctx3 = load %"class.impala::ExprContext"** %ctx_ptr2 -// %result4 = call i16 @Gt_BigIntVal_BigIntValWrapper5( -// %"class.impala::ExprContext"* %ctx3, %"class.impala::TupleRow"* %row) -// %is_null5 = trunc i16 %result4 to i1 -// %2 = ashr i16 %result4, 8 -// %3 = trunc i16 %2 to i8 -// %val6 = trunc i8 %3 to i1 -// %is_false7 = xor i1 %val6, true -// %return_false8 = or i1 %is_null5, %is_false7 -// br i1 %return_false8, label %false, label %continue1 -// -// continue1: ; preds = %continue -// ret i1 true -// -// false: ; preds = %continue, %entry -// ret i1 false -// } -Function* ExecNode::codegen_eval_conjuncts( - RuntimeState* state, const std::vector& conjunct_ctxs, const char* name) { - Function* conjunct_fns[conjunct_ctxs.size()]; - for (int i = 0; i < conjunct_ctxs.size(); ++i) { - Status status = - conjunct_ctxs[i]->root()->get_codegend_compute_fn(state, &conjunct_fns[i]); - if (!status.ok()) { - VLOG_QUERY << "Could not codegen EvalConjuncts: " << status.get_error_msg(); - return NULL; - } - } - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Construct function signature to match - // bool EvalConjuncts(Expr** exprs, int num_exprs, TupleRow* row) - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - Type* expr_ctx_type = codegen->get_type(ExprContext::_s_llvm_class_name); - - DCHECK(tuple_row_type != NULL); - DCHECK(expr_ctx_type != NULL); - - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - PointerType* expr_ctx_ptr_type = PointerType::get(expr_ctx_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, name, codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument( - LlvmCodeGen::NamedVariable("ctxs", PointerType::get(expr_ctx_ptr_type, 0))); - prototype.add_argument( - LlvmCodeGen::NamedVariable("num_ctxs", codegen->get_type(TYPE_INT))); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* ctxs_arg = args[0]; - Value* tuple_row_arg = args[2]; - - if (conjunct_ctxs.size() > 0) { - LLVMContext& context = codegen->context(); - BasicBlock* false_block = BasicBlock::Create(context, "false", fn); - - for (int i = 0; i < conjunct_ctxs.size(); ++i) { - BasicBlock* true_block = BasicBlock::Create(context, "continue", fn, false_block); - - Value* ctx_arg_ptr = builder.CreateConstGEP1_32(ctxs_arg, i, "ctx_ptr"); - Value* ctx_arg = builder.CreateLoad(ctx_arg_ptr, "ctx"); - Value* expr_args[] = { ctx_arg, tuple_row_arg }; - - // Call conjunct_fns[i] - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, conjunct_ctxs[i]->root()->type(), - conjunct_fns[i], expr_args, "result", NULL); - - // Return false if result.is_null || !result - Value* is_null = result.get_is_null(); - Value* is_false = builder.CreateNot(result.get_val(), "is_false"); - Value* return_false = builder.CreateOr(is_null, is_false, "return_false"); - builder.CreateCondBr(return_false, false_block, true_block); - - // Set insertion point for continue/end - builder.SetInsertPoint(true_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - } else { - builder.CreateRet(codegen->true_value()); - } - - return codegen->finalize_function(fn); -} - Status ExecNode::claim_buffer_reservation(RuntimeState* state) { DCHECK(!_buffer_pool_client.is_registered()); BufferPool* buffer_pool = ExecEnv::GetInstance()->buffer_pool(); diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index e40a4d316a..ab51166084 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -33,10 +33,6 @@ #include "service/backend_options.h" #include "util/uid_util.h" // for print_id -namespace llvm { -class Function; -} - namespace doris { class Expr; @@ -134,14 +130,6 @@ public: // each implementation should start out by calling the default implementation. virtual Status close(RuntimeState* state); - llvm::Function* codegen_eval_conjuncts( - RuntimeState* state, const std::vector& conjunct_ctxs, const char* name); - - llvm::Function* codegen_eval_conjuncts( - RuntimeState* state, const std::vector& conjunct_ctxs) { - return codegen_eval_conjuncts(state, conjunct_ctxs, "EvalConjuncts"); - } - // Creates exec node tree from list of nodes contained in plan via depth-first // traversal. All nodes are placed in pool. // Returns error if 'plan' is corrupted, otherwise success. diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp index 084e8b9b38..c31e2ee462 100644 --- a/be/src/exec/hash_join_node.cpp +++ b/be/src/exec/hash_join_node.cpp @@ -19,7 +19,6 @@ #include -#include "codegen/llvm_codegen.h" #include "exec/hash_table.hpp" #include "exprs/expr.h" #include "exprs/in_predicate.h" @@ -29,21 +28,13 @@ #include "util/runtime_profile.h" #include "gen_cpp/PlanNodes_types.h" -using llvm::Function; -using llvm::PointerType; -using llvm::Type; -using llvm::Value; -using llvm::BasicBlock; -using llvm::LLVMContext; namespace doris { -const char* HashJoinNode::_s_llvm_class_name = "class.doris::HashJoinNode"; HashJoinNode::HashJoinNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), _join_op(tnode.hash_join_node.join_op), _probe_eos(false), - _codegen_process_build_batch_fn(NULL), _process_build_batch_fn(NULL), _process_probe_batch_fn(NULL), _anti_join_last_pos(NULL) { @@ -149,39 +140,6 @@ Status HashJoinNode::prepare(RuntimeState* state) { _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); - if (state->codegen_level() > 0) { - if (_join_op == TJoinOp::LEFT_ANTI_JOIN) { - return Status::OK(); - } - LlvmCodeGen* codegen = NULL; - RETURN_IF_ERROR(state->get_codegen(&codegen)); - - // Codegen for hashing rows - Function* hash_fn = _hash_tbl->codegen_hash_current_row(state); - if (hash_fn == NULL) { - return Status::OK(); - } - - // Codegen for build path - _codegen_process_build_batch_fn = codegen_process_build_batch(state, hash_fn); - if (_codegen_process_build_batch_fn != NULL) { - codegen->add_function_to_jit( - _codegen_process_build_batch_fn, - reinterpret_cast(&_process_build_batch_fn)); - // AddRuntimeExecOption("Build Side Codegen Enabled"); - } - - // Codegen for probe path (only for left joins) - if (!_match_all_build) { - Function* codegen_process_probe_batch_fn = codegen_process_probe_batch(state, hash_fn); - if (codegen_process_probe_batch_fn != NULL) { - codegen->add_function_to_jit(codegen_process_probe_batch_fn, - reinterpret_cast(&_process_probe_batch_fn)); - // AddRuntimeExecOption("Probe Side Codegen Enabled"); - } - } - } - return Status::OK(); } @@ -774,207 +732,4 @@ void HashJoinNode::create_output_row(TupleRow* out, TupleRow* probe, TupleRow* b } } -// This codegen'd function should only be used for left join cases so it assumes that -// the probe row is non-null. For a left outer join, the IR looks like: -// define void @CreateOutputRow(%"class.impala::HashBlockingNode"* %this_ptr, -// %"class.impala::TupleRow"* %out_arg, -// %"class.impala::TupleRow"* %probe_arg, -// %"class.impala::TupleRow"* %build_arg) { -// entry: -// %out = bitcast %"class.impala::TupleRow"* %out_arg to i8** -// %probe = bitcast %"class.impala::TupleRow"* %probe_arg to i8** -// %build = bitcast %"class.impala::TupleRow"* %build_arg to i8** -// %0 = bitcast i8** %out to i8* -// %1 = bitcast i8** %probe to i8* -// call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 16, i1 false) -// %is_build_null = icmp eq i8** %build, null -// br i1 %is_build_null, label %build_null, label %build_not_null -// -// build_not_null: ; preds = %entry -// %dst_tuple_ptr1 = getelementptr i8** %out, i32 1 -// %src_tuple_ptr = getelementptr i8** %build, i32 0 -// %2 = load i8** %src_tuple_ptr -// store i8* %2, i8** %dst_tuple_ptr1 -// ret void -// -// build_null: ; preds = %entry -// %dst_tuple_ptr = getelementptr i8** %out, i32 1 -// call void @llvm.memcpy.p0i8.p0i8.i32( -// i8* %dst_tuple_ptr, i8* %1, i32 16, i32 16, i1 false) -// ret void -// } -Function* HashJoinNode::codegen_create_output_row(LlvmCodeGen* codegen) { - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(HashJoinNode::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - // TupleRows are really just an array of pointers. Easier to work with them - // this way. - PointerType* tuple_row_working_type = PointerType::get(codegen->ptr_type(), 0); - - // Construct function signature to match CreateOutputRow() - LlvmCodeGen::FnPrototype prototype(codegen, "CreateOutputRow", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("out_arg", tuple_row_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("probe_arg", tuple_row_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("build_arg", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[4]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* out_row_arg = builder.CreateBitCast(args[1], tuple_row_working_type, "out"); - Value* probe_row_arg = builder.CreateBitCast(args[2], tuple_row_working_type, "probe"); - Value* build_row_arg = builder.CreateBitCast(args[3], tuple_row_working_type, "build"); - - int num_probe_tuples = child(0)->row_desc().tuple_descriptors().size(); - int num_build_tuples = child(1)->row_desc().tuple_descriptors().size(); - - // Copy probe row - codegen->codegen_memcpy(&builder, out_row_arg, probe_row_arg, _probe_tuple_row_size); - Value* build_row_idx[] = { codegen->get_int_constant(TYPE_INT, num_probe_tuples) }; - Value* build_row_dst = builder.CreateGEP(out_row_arg, build_row_idx, "build_dst_ptr"); - - // Copy build row. - BasicBlock* build_not_null_block = BasicBlock::Create(context, "build_not_null", fn); - BasicBlock* build_null_block = NULL; - - if (_match_all_probe) { - // build tuple can be null - build_null_block = BasicBlock::Create(context, "build_null", fn); - Value* is_build_null = builder.CreateIsNull(build_row_arg, "is_build_null"); - builder.CreateCondBr(is_build_null, build_null_block, build_not_null_block); - - // Set tuple build ptrs to NULL - // TODO: this should be replaced with memset() but I can't get the llvm intrinsic - // to work. - builder.SetInsertPoint(build_null_block); - for (int i = 0; i < num_build_tuples; ++i) { - Value* array_idx[] = - { codegen->get_int_constant(TYPE_INT, i + num_probe_tuples) }; - Value* dst = builder.CreateGEP(out_row_arg, array_idx, "dst_tuple_ptr"); - builder.CreateStore(codegen->null_ptr_value(), dst); - } - builder.CreateRetVoid(); - } else { - // build row can't be NULL - builder.CreateBr(build_not_null_block); - } - - // Copy build tuple ptrs - builder.SetInsertPoint(build_not_null_block); - codegen->codegen_memcpy(&builder, build_row_dst, build_row_arg, _build_tuple_row_size); - builder.CreateRetVoid(); - - return codegen->finalize_function(fn); -} - -Function* HashJoinNode::codegen_process_build_batch(RuntimeState* state, Function* hash_fn) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get cross compiled function - Function* process_build_batch_fn = codegen->get_function( - IRFunction::HASH_JOIN_PROCESS_BUILD_BATCH); - DCHECK(process_build_batch_fn != NULL); - - // Codegen for evaluating build rows - Function* eval_row_fn = _hash_tbl->codegen_eval_tuple_row(state, true); - if (eval_row_fn == NULL) { - return NULL; - } - - int replaced = 0; - // Replace call sites - process_build_batch_fn = codegen->replace_call_sites( - process_build_batch_fn, false, eval_row_fn, "eval_build_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_build_batch_fn = codegen->replace_call_sites( - process_build_batch_fn, false, hash_fn, "hash_current_row", &replaced); - DCHECK_EQ(replaced, 1); - - return codegen->optimize_function_with_exprs(process_build_batch_fn); -} - -Function* HashJoinNode::codegen_process_probe_batch(RuntimeState* state, Function* hash_fn) { - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get cross compiled function - Function* process_probe_batch_fn = - codegen->get_function(IRFunction::HASH_JOIN_PROCESS_PROBE_BATCH); - DCHECK(process_probe_batch_fn != NULL); - - // Codegen HashTable::Equals - Function* equals_fn = _hash_tbl->codegen_equals(state); - if (equals_fn == NULL) { - return NULL; - } - - // Codegen for evaluating build rows - Function* eval_row_fn = _hash_tbl->codegen_eval_tuple_row(state, false); - if (eval_row_fn == NULL) { - return NULL; - } - - // Codegen CreateOutputRow - Function* create_output_row_fn = codegen_create_output_row(codegen); - if (create_output_row_fn == NULL) { - return NULL; - } - - // Codegen evaluating other join conjuncts - Function* eval_other_conjuncts_fn = ExecNode::codegen_eval_conjuncts( - state, _other_join_conjunct_ctxs, "EvalOtherConjuncts"); - if (eval_other_conjuncts_fn == NULL) { - return NULL; - } - - // Codegen evaluating conjuncts - Function* eval_conjuncts_fn = ExecNode::codegen_eval_conjuncts(state, _conjunct_ctxs); - if (eval_conjuncts_fn == NULL) { - return NULL; - } - - // Replace all call sites with codegen version - int replaced = 0; - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, hash_fn, "hash_current_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, eval_row_fn, "eval_probe_row", &replaced); - DCHECK_EQ(replaced, 1); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, create_output_row_fn, "create_output_row", &replaced); - // TODO(zc): add semi join - DCHECK_EQ(replaced, 2); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, eval_conjuncts_fn, "eval_conjuncts", &replaced); - DCHECK_EQ(replaced, 2); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, eval_other_conjuncts_fn, - "eval_other_join_conjuncts", &replaced); - // TODO(zc): add semi join - DCHECK_EQ(replaced, 1); - - process_probe_batch_fn = codegen->replace_call_sites( - process_probe_batch_fn, false, equals_fn, "equals", &replaced); - DCHECK_EQ(replaced, 2); - - return codegen->optimize_function_with_exprs(process_probe_batch_fn); -} - } diff --git a/be/src/exec/hash_join_node.h b/be/src/exec/hash_join_node.h index b00c60881d..f4b703b7e8 100644 --- a/be/src/exec/hash_join_node.h +++ b/be/src/exec/hash_join_node.h @@ -58,8 +58,6 @@ public: virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); virtual Status close(RuntimeState* state); - static const char* _s_llvm_class_name; - protected: void debug_string(int indentation_level, std::stringstream* out) const; @@ -117,9 +115,6 @@ private: // This should be the same size as the probe tuple row. int _result_tuple_row_size; - /// llvm function for build batch - llvm::Function* _codegen_process_build_batch_fn; - // Function declaration for codegen'd function. Signature must match // HashJoinNode::ProcessBuildBatch typedef void (*ProcessBuildBatchFn)(HashJoinNode*, RowBatch*); @@ -178,21 +173,6 @@ private: // This is only used for debugging and outputting the left child rows before // doing the join. std::string get_probe_row_output_string(TupleRow* probe_row); - - /// Codegen function to create output row - llvm::Function* codegen_create_output_row(LlvmCodeGen* codegen); - - /// Codegen processing build batches. Identical signature to ProcessBuildBatch. - /// hash_fn is the codegen'd function for computing hashes over tuple rows in the - /// hash table. - /// Returns NULL if codegen was not possible. - llvm::Function* codegen_process_build_batch(RuntimeState* state, llvm::Function* hash_fn); - - /// Codegen processing probe batches. Identical signature to ProcessProbeBatch. - /// hash_fn is the codegen'd function for computing hashes over tuple rows in the - /// hash table. - /// Returns NULL if codegen was not possible. - llvm::Function* codegen_process_probe_batch(RuntimeState* state, llvm::Function* hash_fn); }; } diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 0c805f7cb8..975c493d6f 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -18,7 +18,6 @@ #include "exec/hash_table.hpp" #include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exprs/expr.h" #include "runtime/raw_value.h" @@ -27,18 +26,9 @@ #include "runtime/runtime_state.h" #include "util/doris_metrics.h" -using llvm::BasicBlock; -using llvm::Value; -using llvm::Function; -using llvm::Type; -using llvm::PointerType; -using llvm::LLVMContext; -using llvm::PHINode; - namespace doris { const float HashTable::MAX_BUCKET_OCCUPANCY_FRACTION = 0.75f; -const char* HashTable::_s_llvm_class_name = "class.doris::HashTable"; HashTable::HashTable(const vector& build_expr_ctxs, const vector& probe_expr_ctxs, @@ -328,493 +318,4 @@ std::string HashTable::debug_string(bool skip_empty, const RowDescriptor* desc) return ss.str(); } -// Helper function to store a value into the results buffer if the expr -// evaluated to NULL. We don't want (NULL, 1) to hash to the same as (0,1) so -// we'll pick a more random value. -static void codegen_assign_null_value( - LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder, - Value* dst, const TypeDescriptor& type) { - int64_t fvn_seed = HashUtil::FNV_SEED; - - if (type.type == TYPE_CHAR || type.type == TYPE_VARCHAR) { - Value* dst_ptr = builder->CreateStructGEP(dst, 0, "string_ptr"); - Value* dst_len = builder->CreateStructGEP(dst, 1, "string_len"); - Value* null_len = codegen->get_int_constant(TYPE_INT, fvn_seed); - Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); - builder->CreateStore(null_ptr, dst_ptr); - builder->CreateStore(null_len, dst_len); - return; - } else { - Value* null_value = NULL; - // Get a type specific representation of fvn_seed - switch (type.type) { - case TYPE_BOOLEAN: - // In results, booleans are stored as 1 byte - dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->get_int_constant(TYPE_TINYINT, fvn_seed); - break; - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - null_value = codegen->get_int_constant(type.type, fvn_seed); - break; - case TYPE_FLOAT: { - // Don't care about the value, just the bit pattern - float fvn_seed_float = *reinterpret_cast(&fvn_seed); - null_value = llvm::ConstantFP::get( - codegen->context(), llvm::APFloat(fvn_seed_float)); - break; - } - case TYPE_DOUBLE: { - // Don't care about the value, just the bit pattern - double fvn_seed_double = *reinterpret_cast(&fvn_seed); - null_value = llvm::ConstantFP::get( - codegen->context(), llvm::APFloat(fvn_seed_double)); - break; - } - default: - DCHECK(false); - } - builder->CreateStore(null_value, dst); - } -} - -// Codegen for evaluating a tuple row over either _build_expr_ctxs or _probe_expr_ctxs. -// For the case where we are joining on a single int, the IR looks like -// define i1 @EvaBuildRow(%"class.impala::HashTable"* %this_ptr, -// %"class.impala::TupleRow"* %row) { -// entry: -// %null_ptr = alloca i1 -// %0 = bitcast %"class.doris::TupleRow"* %row to i8** -// %eval = call i32 @SlotRef(i8** %0, i8* null, i1* %null_ptr) -// %1 = load i1* %null_ptr -// br i1 %1, label %null, label %not_null -// -// null: ; preds = %entry -// ret i1 true -// -// not_null: ; preds = %entry -// store i32 %eval, i32* inttoptr (i64 46146336 to i32*) -// br label %continue -// -// continue: ; preds = %not_null -// %2 = zext i1 %1 to i8 -// store i8 %2, i8* inttoptr (i64 46146248 to i8*) -// ret i1 false -// } -// For each expr, we create 3 code blocks. The null, not null and continue blocks. -// Both the null and not null branch into the continue block. The continue block -// becomes the start of the next block for codegen (either the next expr or just the -// end of the function). -Function* HashTable::codegen_eval_tuple_row(RuntimeState* state, bool build) { - // TODO: codegen_assign_null_value() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet - const std::vector& ctxs = build ? _build_expr_ctxs : _probe_expr_ctxs; - for (int i = 0; i < ctxs.size(); ++i) { - PrimitiveType type = ctxs[i]->root()->type().type; - if (type == TYPE_DATE || type == TYPE_DATETIME - || type == TYPE_DECIMAL || type == TYPE_CHAR || type == TYPE_DECIMALV2) { - return NULL; - } - } - - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(HashTable::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype( - codegen, build ? "eval_build_row" : "eval_probe_row", codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - - Value* row = args[1]; - Value* has_null = codegen->false_value(); - - // Aggregation with no grouping exprs also use the hash table interface for - // code simplicity. In that case, there are no build exprs. - if (!_build_expr_ctxs.empty()) { - const std::vector& ctxs = build ? _build_expr_ctxs : _probe_expr_ctxs; - for (int i = 0; i < ctxs.size(); ++i) { - // TODO: refactor this to somewhere else? This is not hash table specific - // except for the null handling bit and would be used for anyone that needs - // to materialize a vector of exprs - // Convert result buffer to llvm ptr type - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ctxs[i]->root()->type()), loc); - - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // Call expr - Function* expr_fn = NULL; - Status status = ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - std::stringstream ss; - ss << "Problem with codegen: " << status.get_error_msg(); - // TODO(zc ) - // state->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str())); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, ctxs[i]->root()->type(), - expr_fn, expr_fn_args, "result", NULL); - Value* is_null = result.get_is_null(); - - // Set null-byte result - Value* null_byte = builder.CreateZExt(is_null, codegen->get_type(TYPE_TINYINT)); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - builder.CreateStore(null_byte, llvm_null_byte_loc); - - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - if (!_stores_nulls) { - // hash table doesn't store nulls, no reason to keep evaluating exprs - builder.CreateRet(codegen->true_value()); - } else { - codegen_assign_null_value(codegen, &builder, llvm_loc, ctxs[i]->root()->type()); - has_null = codegen->true_value(); - builder.CreateBr(continue_block); - } - - // Not null block - builder.SetInsertPoint(not_null_block); - result.to_native_ptr(llvm_loc); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(continue_block); - } - } - builder.CreateRet(has_null); - - return codegen->finalize_function(fn); -} - -// Codegen for hashing the current row. In the case with both string and non-string data -// (group by int_col, string_col), the IR looks like: -// define i32 @hash_current_row(%"class.impala::HashTable"* %this_ptr) { -// entry: -// %0 = call i32 @IrCrcHash(i8* inttoptr (i64 51107808 to i8*), i32 16, i32 0) -// %1 = load i8* inttoptr (i64 29500112 to i8*) -// %2 = icmp ne i8 %1, 0 -// br i1 %2, label %null, label %not_null -// -// null: ; preds = %entry -// %3 = call i32 @IrCrcHash(i8* inttoptr (i64 51107824 to i8*), i32 16, i32 %0) -// br label %continue -// -// not_null: ; preds = %entry -// %4 = load i8** getelementptr inbounds ( -// %"struct.impala::StringValue"* inttoptr -// (i64 51107824 to %"struct.impala::StringValue"*), i32 0, i32 0) -// %5 = load i32* getelementptr inbounds ( -// %"struct.impala::StringValue"* inttoptr -// (i64 51107824 to %"struct.impala::StringValue"*), i32 0, i32 1) -// %6 = call i32 @IrCrcHash(i8* %4, i32 %5, i32 %0) -// br label %continue -// -// continue: ; preds = %not_null, %null -// %7 = phi i32 [ %6, %not_null ], [ %3, %null ] -// ret i32 %7 -// } -// TODO: can this be cross-compiled? -Function* HashTable::codegen_hash_current_row(RuntimeState* state) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) { - return NULL; - } - } - - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get types to generate function prototype - Type* this_type = codegen->get_type(HashTable::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, "hash_current_row", codegen->get_type(TYPE_INT)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* this_arg = NULL; - Function* fn = prototype.generate_prototype(&builder, &this_arg); - - Value* hash_result = codegen->get_int_constant(TYPE_INT, _initial_seed); - Value* data = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), _expr_values_buffer); - if (_var_result_begin == -1) { - // No variable length slots, just hash what is in '_expr_values_buffer' - if (_results_buffer_size > 0) { - Function* hash_fn = codegen->get_hash_function(_results_buffer_size); - Value* len = codegen->get_int_constant(TYPE_INT, _results_buffer_size); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result); - } - } else { - if (_var_result_begin > 0) { - Function* hash_fn = codegen->get_hash_function(_var_result_begin); - Value* len = codegen->get_int_constant(TYPE_INT, _var_result_begin); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result); - } - - // Hash string slots - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - if (_build_expr_ctxs[i]->root()->type().type != TYPE_CHAR - && _build_expr_ctxs[i]->root()->type().type != TYPE_VARCHAR) { - continue; - } - - BasicBlock* null_block = NULL; - BasicBlock* not_null_block = NULL; - BasicBlock* continue_block = NULL; - Value* str_null_result = NULL; - - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - - // If the hash table stores nulls, we need to check if the stringval - // evaluated to NULL - if (_stores_nulls) { - null_block = BasicBlock::Create(context, "null", fn); - not_null_block = BasicBlock::Create(context, "not_null", fn); - continue_block = BasicBlock::Create(context, "continue", fn); - - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - Value* is_null = builder.CreateICmpNE( - null_byte, codegen->get_int_constant(TYPE_TINYINT, 0)); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // For null, we just want to call the hash function on the portion of - // the data - builder.SetInsertPoint(null_block); - Function* null_hash_fn = codegen->get_hash_function(sizeof(StringValue)); - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), loc); - Value* len = codegen->get_int_constant(TYPE_INT, sizeof(StringValue)); - str_null_result = builder.CreateCall3(null_hash_fn, llvm_loc, len, hash_result); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(not_null_block); - } - - // Convert _expr_values_buffer loc to llvm value - Value* str_val = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(TYPE_VARCHAR), loc); - - Value* ptr = builder.CreateStructGEP(str_val, 0, "ptr"); - Value* len = builder.CreateStructGEP(str_val, 1, "len"); - ptr = builder.CreateLoad(ptr); - len = builder.CreateLoad(len); - - // Call hash(ptr, len, hash_result); - Function* general_hash_fn = codegen->get_hash_function(); - Value* string_hash_result = - builder.CreateCall3(general_hash_fn, ptr, len, hash_result); - - if (_stores_nulls) { - builder.CreateBr(continue_block); - builder.SetInsertPoint(continue_block); - // Use phi node to reconcile that we could have come from the string-null - // path and string not null paths. - PHINode* phi_node = builder.CreatePHI(codegen->get_type(TYPE_INT), 2); - phi_node->addIncoming(string_hash_result, not_null_block); - phi_node->addIncoming(str_null_result, null_block); - hash_result = phi_node; - } else { - hash_result = string_hash_result; - } - } - } - - builder.CreateRet(hash_result); - return codegen->finalize_function(fn); -} - -// Codegen for HashTable::Equals. For a hash table with two exprs (string,int), the -// IR looks like: -// -// define i1 @Equals(%"class.impala::OldHashTable"* %this_ptr, -// %"class.impala::TupleRow"* %row) { -// entry: -// %result = call i64 @get_slot_ref(%"class.impala::ExprContext"* inttoptr -// (i64 146381856 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %0 = trunc i64 %result to i1 -// br i1 %0, label %null, label %not_null -// -// false_block: ; preds = %not_null2, %null1, %not_null, %null -// ret i1 false -// -// null: ; preds = %entry -// br i1 false, label %continue, label %false_block -// -// not_null: ; preds = %entry -// %1 = load i32* inttoptr (i64 104774368 to i32*) -// %2 = ashr i64 %result, 32 -// %3 = trunc i64 %2 to i32 -// %cmp_raw = icmp eq i32 %3, %1 -// br i1 %cmp_raw, label %continue, label %false_block -// -// continue: ; preds = %not_null, %null -// %result4 = call { i64, i8* } @get_slot_ref( -// %"class.impala::ExprContext"* inttoptr -// (i64 146381696 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %4 = extractvalue { i64, i8* } %result4, 0 -// %5 = trunc i64 %4 to i1 -// br i1 %5, label %null1, label %not_null2 -// -// null1: ; preds = %continue -// br i1 false, label %continue3, label %false_block -// -// not_null2: ; preds = %continue -// %6 = extractvalue { i64, i8* } %result4, 0 -// %7 = ashr i64 %6, 32 -// %8 = trunc i64 %7 to i32 -// %result5 = extractvalue { i64, i8* } %result4, 1 -// %cmp_raw6 = call i1 @_Z11StringValEQPciPKN6impala11StringValueE( -// i8* %result5, i32 %8, %"struct.impala::StringValue"* inttoptr -// (i64 104774384 to %"struct.impala::StringValue"*)) -// br i1 %cmp_raw6, label %continue3, label %false_block -// -// continue3: ; preds = %not_null2, %null1 -// ret i1 true -// } -Function* HashTable::codegen_equals(RuntimeState* state) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) { - return NULL; - } - } - - LlvmCodeGen* codegen = NULL; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(HashTable::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, "equals", codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* row = args[1]; - - if (!_build_expr_ctxs.empty()) { - BasicBlock* false_block = BasicBlock::Create(context, "false_block", fn); - - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // call GetValue on build_exprs[i] - Function* expr_fn = NULL; - Status status = _build_expr_ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - std::stringstream ss; - ss << "Problem with codegen: " << status.get_error_msg(); - // TODO(zc) - // state->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str())); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), _build_expr_ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, _build_expr_ctxs[i]->root()->type(), - expr_fn, expr_fn_args, "result", NULL); - Value* is_null = result.get_is_null(); - - // Determine if probe is null (i.e. _expr_value_null_bits[i] == true). In - // the case where the hash table does not store nulls, this is always false. - Value* probe_is_null = codegen->false_value(); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - if (_stores_nulls) { - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - probe_is_null = builder.CreateICmpNE( - null_byte, codegen->get_int_constant(TYPE_TINYINT, 0)); - } - - // Get llvm value for probe_val from '_expr_values_buffer' - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* probe_val = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(_build_expr_ctxs[i]->root()->type()), loc); - - // Branch for GetValue() returning NULL - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - builder.CreateCondBr(probe_is_null, continue_block, false_block); - - // Not-null block - builder.SetInsertPoint(not_null_block); - if (_stores_nulls) { - BasicBlock* cmp_block = BasicBlock::Create(context, "cmp", fn); - // First need to compare that probe expr[i] is not null - builder.CreateCondBr(probe_is_null, false_block, cmp_block); - builder.SetInsertPoint(cmp_block); - } - // Check result == probe_val - Value* is_equal = result.eq_to_native_ptr(probe_val); - builder.CreateCondBr(is_equal, continue_block, false_block); - - builder.SetInsertPoint(continue_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - } else { - builder.CreateRet(codegen->true_value()); - } - - return codegen->finalize_function(fn); -} - } diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h index 0b3168871d..3d4b502975 100644 --- a/be/src/exec/hash_table.h +++ b/be/src/exec/hash_table.h @@ -25,17 +25,10 @@ #include "common/logging.h" #include "util/hash_util.hpp" -namespace llvm { - -class Function; - -} - namespace doris { class Expr; class ExprContext; -class LlvmCodeGen; class RowDescriptor; class Tuple; class TupleRow; @@ -179,21 +172,6 @@ public: return Iterator(); } - /// Codegen for evaluating a tuple row. Codegen'd function matches the signature - /// for EvalBuildRow and EvalTupleRow. - /// if build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs - llvm::Function* codegen_eval_tuple_row(RuntimeState* state, bool build_row); - - /// Codegen for hashing the expr values in '_expr_values_buffer'. Function - /// prototype matches hash_current_row identically. - llvm::Function* codegen_hash_current_row(RuntimeState* state); - - /// Codegen for evaluating a TupleRow and comparing equality against - /// '_expr_values_buffer'. Function signature matches HashTable::Equals() - llvm::Function* codegen_equals(RuntimeState* state); - - static const char* _s_llvm_class_name; - // Dump out the entire hash table to string. If skip_empty, empty buckets are // skipped. If build_desc is non-null, the build rows will be output. Otherwise // just the build row addresses. diff --git a/be/src/exec/merge_join_node.h b/be/src/exec/merge_join_node.h index 82a7214211..758c9fa3fb 100644 --- a/be/src/exec/merge_join_node.h +++ b/be/src/exec/merge_join_node.h @@ -46,8 +46,6 @@ public: virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); virtual Status close(RuntimeState* state); - static const char* LLVM_CLASS_NAME; - protected: void debug_string(int indentation_level, std::stringstream* out) const; diff --git a/be/src/exec/new_partitioned_aggregation_node.cc b/be/src/exec/new_partitioned_aggregation_node.cc index 9ea0826135..689567cecf 100644 --- a/be/src/exec/new_partitioned_aggregation_node.cc +++ b/be/src/exec/new_partitioned_aggregation_node.cc @@ -23,7 +23,6 @@ #include //#include "codegen/codegen_anyval.h" -//#include "codegen/llvm_codegen.h" #include "exec/new_partitioned_hash_table.h" #include "exec/new_partitioned_hash_table.inline.h" #include "exprs/new_agg_fn_evaluator.h" @@ -50,14 +49,10 @@ #include "common/names.h" -// using namespace llvm; using namespace strings; namespace doris { -const char* NewPartitionedAggregationNode::LLVM_CLASS_NAME = - "class.doris::NewPartitionedAggregationNode"; - /// The minimum reduction factor (input rows divided by output rows) to grow hash tables /// in a streaming preaggregation, given that the hash tables are currently the given /// size or above. The sizes roughly correspond to hash table sizes where the bucket @@ -237,20 +232,6 @@ Status NewPartitionedAggregationNode::prepare(RuntimeState* state) { return Status::OK(); } -//void NewPartitionedAggregationNode::Codegen(RuntimeState* state) { -// DCHECK(state->ShouldCodegen()); -// ExecNode::Codegen(state); -// if (IsNodeCodegenDisabled()) return; -// -// LlvmCodeGen* codegen = state->codegen(); -// DCHECK(codegen != NULL); -// TPrefetchMode::type prefetch_mode = state_->query_options().prefetch_mode; -// Status codegen_status = is_streaming_preagg_ ? -// CodegenProcessBatchStreaming(codegen, prefetch_mode) : -// CodegenProcessBatch(codegen, prefetch_mode); -// runtime_profile()->AddCodegenMsg(codegen_status.ok(), codegen_status); -//} - Status NewPartitionedAggregationNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); // Open the child before consuming resources in this node. @@ -1470,605 +1451,6 @@ void NewPartitionedAggregationNode::ClosePartitions() { // return ExecNode::QueryMaintenance(state); //} -#if 0 - -// IR Generation for updating a single aggregation slot. Signature is: -// void UpdateSlot(FunctionContext* agg_fn_ctx, ExprContext* agg_expr_ctx, -// AggTuple* agg_tuple, char** row) -// -// The IR for sum(double_col), which is constructed directly with the IRBuilder, is: -// -// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"** %agg_expr_ctxs, -// { i8, [7 x i8], double }* %agg_tuple, %"class.impala::TupleRow"* %row) #34 { -// entry: -// %expr_ctx_ptr = getelementptr %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %agg_expr_ctxs, i32 0 -// %expr_ctx = load %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %expr_ctx_ptr -// %input0 = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx, -// %"class.impala::TupleRow"* %row) -// %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], double }, -// { i8, [7 x i8], double }* %agg_tuple, i32 0, i32 2 -// %dst_val = load double, double* %dst_slot_ptr -// %0 = extractvalue { i8, double } %input0, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %not_null -// -// ret: ; preds = %not_null, %entry -// ret void -// -// not_null: ; preds = %entry -// %val = extractvalue { i8, double } %input0, 1 -// %1 = fadd double %dst_val, %val -// %2 = bitcast { i8, [7 x i8], double }* %agg_tuple to i8* -// %null_byte_ptr = getelementptr i8, i8* %2, i32 0 -// %null_byte = load i8, i8* %null_byte_ptr -// %null_bit_cleared = and i8 %null_byte, -2 -// store i8 %null_bit_cleared, i8* %null_byte_ptr -// store double %1, double* %dst_slot_ptr -// br label %ret -// } -// -// The IR for min(timestamp_col), which uses the UDA interface, is: -// -// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"** %agg_expr_ctxs, -// { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple, -// %"class.impala::TupleRow"* %row) #34 { -// entry: -// %dst_lowered_ptr = alloca { i64, i64 } -// %input_lowered_ptr = alloca { i64, i64 } -// %expr_ctx_ptr = getelementptr %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %agg_expr_ctxs, i32 0 -// %expr_ctx = load %"class.impala::ExprContext"*, -// %"class.impala::ExprContext"** %expr_ctx_ptr -// %input0 = call { i64, i64 } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx, -// %"class.impala::TupleRow"* %row) -// %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], -// %"class.impala::TimestampValue" }, { i8, [7 x i8], -// %"class.impala::TimestampValue" }* %agg_tuple, i32 0, i32 2 -// %dst_val = load %"class.impala::TimestampValue", -// %"class.impala::TimestampValue"* %dst_slot_ptr -// %0 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* -// %null_byte_ptr = getelementptr i8, i8* %0, i32 0 -// %null_byte = load i8, i8* %null_byte_ptr -// %null_mask = and i8 %null_byte, 1 -// %is_null = icmp ne i8 %null_mask, 0 -// %is_null_ext = zext i1 %is_null to i64 -// %1 = or i64 0, %is_null_ext -// %dst = insertvalue { i64, i64 } zeroinitializer, i64 %1, 0 -// %time_of_day = extractvalue %"class.impala::TimestampValue" %dst_val, 0, 0, 0, 0 -// %dst1 = insertvalue { i64, i64 } %dst, i64 %time_of_day, 1 -// %date = extractvalue %"class.impala::TimestampValue" %dst_val, 1, 0, 0 -// %2 = extractvalue { i64, i64 } %dst1, 0 -// %3 = zext i32 %date to i64 -// %4 = shl i64 %3, 32 -// %5 = and i64 %2, 4294967295 -// %6 = or i64 %5, %4 -// %dst2 = insertvalue { i64, i64 } %dst1, i64 %6, 0 -// store { i64, i64 } %input0, { i64, i64 }* %input_lowered_ptr -// %input_unlowered_ptr = bitcast { i64, i64 }* %input_lowered_ptr -// to %"struct.impala_udf::TimestampVal"* -// store { i64, i64 } %dst2, { i64, i64 }* %dst_lowered_ptr -// %dst_unlowered_ptr = bitcast { i64, i64 }* %dst_lowered_ptr -// to %"struct.impala_udf::TimestampVal"* -// call void -// @_ZN6impala18AggregateFunctions3MinIN10impala_udf12TimestampValEEEvPNS2_15FunctionContextERKT_PS6_.2( -// %"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"struct.impala_udf::TimestampVal"* %input_unlowered_ptr, -// %"struct.impala_udf::TimestampVal"* %dst_unlowered_ptr) -// %anyval_result = load { i64, i64 }, { i64, i64 }* %dst_lowered_ptr -// %7 = extractvalue { i64, i64 } %anyval_result, 1 -// %8 = insertvalue %"class.impala::TimestampValue" zeroinitializer, i64 %7, 0, 0, 0, 0 -// %9 = extractvalue { i64, i64 } %anyval_result, 0 -// %10 = ashr i64 %9, 32 -// %11 = trunc i64 %10 to i32 -// %12 = insertvalue %"class.impala::TimestampValue" %8, i32 %11, 1, 0, 0 -// %13 = extractvalue { i64, i64 } %anyval_result, 0 -// %result_is_null = trunc i64 %13 to i1 -// %14 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* -// %null_byte_ptr3 = getelementptr i8, i8* %14, i32 0 -// %null_byte4 = load i8, i8* %null_byte_ptr3 -// %null_bit_cleared = and i8 %null_byte4, -2 -// %15 = sext i1 %result_is_null to i8 -// %null_bit = and i8 %15, 1 -// %null_bit_set = or i8 %null_bit_cleared, %null_bit -// store i8 %null_bit_set, i8* %null_byte_ptr3 -// store %"class.impala::TimestampValue" %12, -// %"class.impala::TimestampValue"* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %entry -// ret void -// } -// -//Status NewPartitionedAggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen, -// NewAggFnEvaluator* evaluator, int evaluator_idx, SlotDescriptor* slot_desc, -// Function** fn) { -// PointerType* fn_ctx_type = -// codegen->GetPtrType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME); -// PointerType* expr_ctxs_type = -// codegen->GetPtrPtrType(codegen->GetType(ExprContext::LLVM_CLASS_NAME)); -// StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen); -// if (tuple_struct == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateSlot(): failed to generate " -// "intermediate tuple desc"); -// } -// PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_struct); -// PointerType* tuple_row_ptr_type = codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME); -// -// // Create UpdateSlot prototype -// LlvmCodeGen::FnPrototype prototype(codegen, "UpdateSlot", codegen->void_type()); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_fn_ctx", fn_ctx_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_expr_ctxs", expr_ctxs_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); -// -// LlvmBuilder builder(codegen->context()); -// Value* args[4]; -// *fn = prototype.GeneratePrototype(&builder, &args[0]); -// Value* agg_fn_ctx_arg = args[0]; -// Value* agg_expr_ctxs_arg = args[1]; -// Value* agg_tuple_arg = args[2]; -// Value* row_arg = args[3]; -// -// DCHECK_GE(evaluator->input_expr_ctxs().size(), 1); -// vector input_vals; -// for (int i = 0; i < evaluator->input_expr_ctxs().size(); ++i) { -// ExprContext* agg_expr_ctx = evaluator->input_expr_ctxs()[i]; -// Expr* agg_expr = agg_expr_ctx->root(); -// Function* agg_expr_fn; -// RETURN_IF_ERROR(agg_expr->GetCodegendComputeFn(codegen, &agg_expr_fn)); -// DCHECK(agg_expr_fn != NULL); -// -// // Call expr function with the matching expr context to get src slot value. -// Value* expr_ctx_ptr = builder.CreateInBoundsGEP( -// agg_expr_ctxs_arg, codegen->GetIntConstant(TYPE_INT, i), "expr_ctx_ptr"); -// Value* expr_ctx = builder.CreateLoad(expr_ctx_ptr, "expr_ctx"); -// string input_name = Substitute("input$0", i); -// input_vals.push_back( -// CodegenAnyVal::CreateCallWrapped(codegen, &builder, agg_expr->type(), agg_expr_fn, -// ArrayRef({expr_ctx, row_arg}), input_name.c_str())); -// } -// -// NewAggFnEvaluator::AggregationOp agg_op = evaluator->agg_op(); -// const ColumnType& dst_type = evaluator->intermediate_type(); -// bool dst_is_int_or_float_or_bool = dst_type.IsIntegerType() -// || dst_type.IsFloatingPointType() || dst_type.IsBooleanType(); -// bool dst_is_numeric_or_bool = dst_is_int_or_float_or_bool || dst_type.IsDecimalType(); -// -// BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", *fn); -// -// // Emit the code to compute 'result' and set the NULL indicator if needed. First check -// // for special cases where we can emit a very simple instruction sequence, then fall -// // back to the general-purpose approach of calling the cross-compiled builtin UDA. -// CodegenAnyVal& src = input_vals[0]; -// // 'dst_slot_ptr' points to the slot in the aggregate tuple to update. -// Value* dst_slot_ptr = builder.CreateStructGEP( -// NULL, agg_tuple_arg, slot_desc->llvm_field_idx(), "dst_slot_ptr"); -// Value* result = NULL; -// Value* dst_value = builder.CreateLoad(dst_slot_ptr, "dst_val"); -// if (agg_op == NewAggFnEvaluator::COUNT) { -// src.CodegenBranchIfNull(&builder, ret_block); -// if (evaluator->is_merge()) { -// result = builder.CreateAdd(dst_value, src.GetVal(), "count_sum"); -// } else { -// result = builder.CreateAdd( -// dst_value, codegen->GetIntConstant(TYPE_BIGINT, 1), "count_inc"); -// } -// DCHECK(!slot_desc->is_nullable()); -// } else if ((agg_op == NewAggFnEvaluator::MIN || agg_op == NewAggFnEvaluator::MAX) -// && dst_is_numeric_or_bool) { -// bool is_min = agg_op == NewAggFnEvaluator::MIN; -// src.CodegenBranchIfNull(&builder, ret_block); -// Function* min_max_fn = codegen->CodegenMinMax(slot_desc->type(), is_min); -// Value* min_max_args[] = {dst_value, src.GetVal()}; -// result = -// builder.CreateCall(min_max_fn, min_max_args, is_min ? "min_value" : "max_value"); -// // Dst may have been NULL, make sure to unset the NULL bit. -// DCHECK(slot_desc->is_nullable()); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, codegen->false_value()); -// } else if (agg_op == NewAggFnEvaluator::SUM && dst_is_int_or_float_or_bool) { -// src.CodegenBranchIfNull(&builder, ret_block); -// if (dst_type.IsFloatingPointType()) { -// result = builder.CreateFAdd(dst_value, src.GetVal()); -// } else { -// result = builder.CreateAdd(dst_value, src.GetVal()); -// } -// // Dst may have been NULL, make sure to unset the NULL bit. -// DCHECK(slot_desc->is_nullable()); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, codegen->false_value()); -// } else { -// // The remaining cases are implemented using the UDA interface. -// // Create intermediate argument 'dst' from 'dst_value' -// CodegenAnyVal dst = CodegenAnyVal::GetNonNullVal(codegen, &builder, dst_type, "dst"); -// -// // For a subset of builtins we generate a different code sequence that exploits two -// // properties of the builtins. First, NULL input values can be skipped. Second, the -// // value of the slot was initialized in the right way in InitAggSlots() (e.g. 0 for -// // SUM) that we get the right result if UpdateSlot() pretends that the NULL bit of -// // 'dst' is unset. Empirically this optimisation makes TPC-H Q1 5-10% faster. -// bool special_null_handling = !evaluator->intermediate_type().IsStringType() -// && !evaluator->intermediate_type().IsTimestampType() -// && (agg_op == NewAggFnEvaluator::MIN || agg_op == NewAggFnEvaluator::MAX -// || agg_op == NewAggFnEvaluator::SUM || agg_op == NewAggFnEvaluator::AVG -// || agg_op == NewAggFnEvaluator::NDV); -// if (slot_desc->is_nullable()) { -// if (special_null_handling) { -// src.CodegenBranchIfNull(&builder, ret_block); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, codegen->false_value()); -// } else { -// dst.SetIsNull(slot_desc->CodegenIsNull(codegen, &builder, agg_tuple_arg)); -// } -// } -// dst.SetFromRawValue(dst_value); -// -// // Call the UDA to update/merge 'src' into 'dst', with the result stored in -// // 'updated_dst_val'. -// CodegenAnyVal updated_dst_val; -// RETURN_IF_ERROR(CodegenCallUda(codegen, &builder, evaluator, agg_fn_ctx_arg, -// input_vals, dst, &updated_dst_val)); -// result = updated_dst_val.ToNativeValue(); -// -// if (slot_desc->is_nullable() && !special_null_handling) { -// // Set NULL bit in the slot based on the return value. -// Value* result_is_null = updated_dst_val.GetIsNull("result_is_null"); -// slot_desc->CodegenSetNullIndicator( -// codegen, &builder, agg_tuple_arg, result_is_null); -// } -// } -// -// // TODO: Store to register in the loop and store once to memory at the end of the loop. -// builder.CreateStore(result, dst_slot_ptr); -// builder.CreateBr(ret_block); -// -// builder.SetInsertPoint(ret_block); -// builder.CreateRetVoid(); -// -// // Avoid producing huge UpdateTuple() function after inlining - LLVM's optimiser -// // memory/CPU usage scales super-linearly with function size. -// // E.g. compute stats on all columns of a 1000-column table previously took 4 minutes to -// // codegen because all the UpdateSlot() functions were inlined. -// if (evaluator_idx >= LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) { -// codegen->SetNoInline(*fn); -// } -// -// *fn = codegen->FinalizeFunction(*fn); -// if (*fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateSlot(): codegen'd " -// "UpdateSlot() function failed verification, see log"); -// } -// return Status::OK(); -//} -// -//Status NewPartitionedAggregationNode::CodegenCallUda(LlvmCodeGen* codegen, -// LlvmBuilder* builder, NewAggFnEvaluator* evaluator, Value* agg_fn_ctx_arg, -// const vector& input_vals, const CodegenAnyVal& dst, -// CodegenAnyVal* updated_dst_val) { -// DCHECK_EQ(evaluator->input_expr_ctxs().size(), input_vals.size()); -// Function* uda_fn; -// RETURN_IF_ERROR(evaluator->GetUpdateOrMergeFunction(codegen, &uda_fn)); -// -// // Set up arguments for call to UDA, which are the FunctionContext*, followed by -// // pointers to all input values, followed by a pointer to the destination value. -// vector uda_fn_args; -// uda_fn_args.push_back(agg_fn_ctx_arg); -// -// // Create pointers to input args to pass to uda_fn. We must use the unlowered type, -// // e.g. IntVal, because the UDA interface expects the values to be passed as const -// // references to the classes. -// for (int i = 0; i < evaluator->input_expr_ctxs().size(); ++i) { -// uda_fn_args.push_back(input_vals[i].GetUnloweredPtr("input_unlowered_ptr")); -// } -// -// // Create pointer to dst to pass to uda_fn. We must use the unlowered type for the -// // same reason as above. -// Value* dst_lowered_ptr = dst.GetLoweredPtr("dst_lowered_ptr"); -// const ColumnType& dst_type = evaluator->intermediate_type(); -// Type* dst_unlowered_ptr_type = CodegenAnyVal::GetUnloweredPtrType(codegen, dst_type); -// Value* dst_unlowered_ptr = builder->CreateBitCast( -// dst_lowered_ptr, dst_unlowered_ptr_type, "dst_unlowered_ptr"); -// uda_fn_args.push_back(dst_unlowered_ptr); -// -// // Call 'uda_fn' -// builder->CreateCall(uda_fn, uda_fn_args); -// -// // Convert intermediate 'dst_arg' back to the native type. -// Value* anyval_result = builder->CreateLoad(dst_lowered_ptr, "anyval_result"); -// -// *updated_dst_val = CodegenAnyVal(codegen, builder, dst_type, anyval_result); -// return Status::OK(); -//} - -// IR codegen for the UpdateTuple loop. This loop is query specific and based on the -// aggregate functions. The function signature must match the non- codegen'd UpdateTuple -// exactly. -// For the query: -// select count(*), count(int_col), sum(double_col) the IR looks like: -// -// ; Function Attrs: alwaysinline -// define void @UpdateTuple(%"class.impala::NewPartitionedAggregationNode"* %this_ptr, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, %"class.impala::Tuple"* -// %tuple, -// %"class.impala::TupleRow"* %row, i1 %is_merge) #34 { -// entry: -// %tuple1 = -// bitcast %"class.impala::Tuple"* %tuple to { i8, [7 x i8], i64, i64, double }* -// %src_slot = getelementptr inbounds { i8, [7 x i8], i64, i64, double }, -// { i8, [7 x i8], i64, i64, double }* %tuple1, i32 0, i32 2 -// %count_star_val = load i64, i64* %src_slot -// %count_star_inc = add i64 %count_star_val, 1 -// store i64 %count_star_inc, i64* %src_slot -// %0 = getelementptr %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 1 -// %agg_fn_ctx = load %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %0 -// %1 = call %"class.impala::ExprContext"** -// @_ZNK6impala26NewPartitionedAggregationNode18GetAggExprContextsEi( -// %"class.impala::NewPartitionedAggregationNode"* %this_ptr, i32 1) -// call void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx, -// %"class.impala::ExprContext"** %1, { i8, [7 x i8], i64, i64, double }* %tuple1, -// %"class.impala::TupleRow"* %row) -// %2 = getelementptr %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 2 -// %agg_fn_ctx2 = load %"class.impala_udf::FunctionContext"*, -// %"class.impala_udf::FunctionContext"** %2 -// %3 = call %"class.impala::ExprContext"** -// @_ZNK6impala26NewPartitionedAggregationNode18GetAggExprContextsEi( -// %"class.impala::NewPartitionedAggregationNode"* %this_ptr, i32 2) -// call void @UpdateSlot.4(%"class.impala_udf::FunctionContext"* %agg_fn_ctx2, -// %"class.impala::ExprContext"** %3, { i8, [7 x i8], i64, i64, double }* %tuple1, -// %"class.impala::TupleRow"* %row) -// ret void -// } -//Status NewPartitionedAggregationNode::CodegenUpdateTuple( -// LlvmCodeGen* codegen, Function** fn) { -// SCOPED_TIMER(codegen->codegen_timer()); -// -// for (const SlotDescriptor* slot_desc : intermediate_tuple_desc_->slots()) { -// if (slot_desc->type().type == TYPE_CHAR) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateTuple(): cannot codegen" -// "CHAR in aggregations"); -// } -// } -// -// if (intermediate_tuple_desc_->GetLlvmStruct(codegen) == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateTuple(): failed to generate " -// "intermediate tuple desc"); -// } -// -// // Get the types to match the UpdateTuple signature -// Type* agg_node_type = codegen->GetType(NewPartitionedAggregationNode::LLVM_CLASS_NAME); -// Type* fn_ctx_type = codegen->GetType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME); -// Type* tuple_type = codegen->GetType(Tuple::LLVM_CLASS_NAME); -// Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME); -// -// PointerType* agg_node_ptr_type = codegen->GetPtrType(agg_node_type); -// PointerType* fn_ctx_ptr_ptr_type = codegen->GetPtrPtrType(fn_ctx_type); -// PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_type); -// PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type); -// -// StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen); -// PointerType* tuple_ptr = codegen->GetPtrType(tuple_struct); -// LlvmCodeGen::FnPrototype prototype(codegen, "UpdateTuple", codegen->void_type()); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", agg_node_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_fn_ctxs", fn_ctx_ptr_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); -// prototype.AddArgument(LlvmCodeGen::NamedVariable("is_merge", codegen->boolean_type())); -// -// LlvmBuilder builder(codegen->context()); -// Value* args[5]; -// *fn = prototype.GeneratePrototype(&builder, &args[0]); -// Value* this_arg = args[0]; -// Value* agg_fn_ctxs_arg = args[1]; -// Value* tuple_arg = args[2]; -// Value* row_arg = args[3]; -// -// // Cast the parameter types to the internal llvm runtime types. -// // TODO: get rid of this by using right type in function signature -// tuple_arg = builder.CreateBitCast(tuple_arg, tuple_ptr, "tuple"); -// -// Function* get_expr_ctxs_fn = -// codegen->GetFunction(IRFunction::PART_AGG_NODE_GET_EXPR_CTXS, false); -// DCHECK(get_expr_ctxs_fn != NULL); -// -// // Loop over each expr and generate the IR for that slot. If the expr is not -// // count(*), generate a helper IR function to update the slot and call that. -// int j = grouping_expr_ctxs_.size(); -// for (int i = 0; i < aggregate_evaluators_.size(); ++i, ++j) { -// SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[j]; -// NewAggFnEvaluator* evaluator = aggregate_evaluators_[i]; -// if (evaluator->is_count_star()) { -// // TODO: we should be able to hoist this up to the loop over the batch and just -// // increment the slot by the number of rows in the batch. -// int field_idx = slot_desc->llvm_field_idx(); -// Value* const_one = codegen->GetIntConstant(TYPE_BIGINT, 1); -// Value* slot_ptr = builder.CreateStructGEP(NULL, tuple_arg, field_idx, "src_slot"); -// Value* slot_loaded = builder.CreateLoad(slot_ptr, "count_star_val"); -// Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc"); -// builder.CreateStore(count_inc, slot_ptr); -// } else { -// Function* update_slot_fn; -// RETURN_IF_ERROR( -// CodegenUpdateSlot(codegen, evaluator, i, slot_desc, &update_slot_fn)); -// Value* agg_fn_ctx_ptr = builder.CreateConstGEP1_32(agg_fn_ctxs_arg, i); -// Value* agg_fn_ctx = builder.CreateLoad(agg_fn_ctx_ptr, "agg_fn_ctx"); -// // Call GetExprCtx() to get the expression context. -// DCHECK(agg_expr_ctxs_[i] != NULL); -// Value* get_expr_ctxs_args[] = {this_arg, codegen->GetIntConstant(TYPE_INT, i)}; -// Value* agg_expr_ctxs = builder.CreateCall(get_expr_ctxs_fn, get_expr_ctxs_args); -// Value* update_slot_args[] = {agg_fn_ctx, agg_expr_ctxs, tuple_arg, row_arg}; -// builder.CreateCall(update_slot_fn, update_slot_args); -// } -// } -// builder.CreateRetVoid(); -// -// // Avoid inlining big UpdateTuple function into outer loop - we're unlikely to get -// // any benefit from it since the function call overhead will be amortized. -// if (aggregate_evaluators_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { -// codegen->SetNoInline(*fn); -// } -// -// // CodegenProcessBatch() does the final optimizations. -// *fn = codegen->FinalizeFunction(*fn); -// if (*fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenUpdateTuple(): codegen'd " -// "UpdateTuple() function failed verification, see log"); -// } -// return Status::OK(); -//} -// -//Status NewPartitionedAggregationNode::CodegenProcessBatch(LlvmCodeGen* codegen, -// TPrefetchMode::type prefetch_mode) { -// SCOPED_TIMER(codegen->codegen_timer()); -// -// Function* update_tuple_fn; -// RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn)); -// -// // Get the cross compiled update row batch function -// IRFunction::Type ir_fn = (!grouping_expr_ctxs_.empty() ? -// IRFunction::PART_AGG_NODE_PROCESS_BATCH_UNAGGREGATED : -// IRFunction::PART_AGG_NODE_PROCESS_BATCH_NO_GROUPING); -// Function* process_batch_fn = codegen->GetFunction(ir_fn, true); -// DCHECK(process_batch_fn != NULL); -// -// int replaced; -// if (!grouping_expr_ctxs_.empty()) { -// // Codegen for grouping using hash table -// -// // Replace prefetch_mode with constant so branches can be optimised out. -// Value* prefetch_mode_arg = codegen->GetArgument(process_batch_fn, 3); -// prefetch_mode_arg->replaceAllUsesWith( -// ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode)); -// -// // The codegen'd ProcessBatch function is only used in Open() with level_ = 0, -// // so don't use murmur hash -// Function* hash_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, /* use murmur */ false, &hash_fn)); -// -// // Codegen HashTable::Equals -// Function* build_equals_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, true, &build_equals_fn)); -// -// // Codegen for evaluating input rows -// Function* eval_grouping_expr_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(codegen, false, &eval_grouping_expr_fn)); -// -// // Replace call sites -// replaced = codegen->ReplaceCallSites(process_batch_fn, eval_grouping_expr_fn, -// "EvalProbeRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_fn, hash_fn, "HashRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_fn, build_equals_fn, "Equals"); -// DCHECK_EQ(replaced, 1); -// -// NewPartitionedHashTableCtx::HashTableReplacedConstants replaced_constants; -// const bool stores_duplicates = false; -// RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates, 1, -// process_batch_fn, &replaced_constants)); -// DCHECK_GE(replaced_constants.stores_nulls, 1); -// DCHECK_GE(replaced_constants.finds_some_nulls, 1); -// DCHECK_GE(replaced_constants.stores_duplicates, 1); -// DCHECK_GE(replaced_constants.stores_tuples, 1); -// DCHECK_GE(replaced_constants.quadratic_probing, 1); -// } -// -// replaced = codegen->ReplaceCallSites(process_batch_fn, update_tuple_fn, "UpdateTuple"); -// DCHECK_GE(replaced, 1); -// process_batch_fn = codegen->FinalizeFunction(process_batch_fn); -// if (process_batch_fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenProcessBatch(): codegen'd " -// "ProcessBatch() function failed verification, see log"); -// } -// -// void **codegened_fn_ptr = grouping_expr_ctxs_.empty() ? -// reinterpret_cast(&process_batch_no_grouping_fn_) : -// reinterpret_cast(&process_batch_fn_); -// codegen->AddFunctionToJit(process_batch_fn, codegened_fn_ptr); -// return Status::OK(); -//} -// -//Status NewPartitionedAggregationNode::CodegenProcessBatchStreaming( -// LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode) { -// DCHECK(is_streaming_preagg_); -// SCOPED_TIMER(codegen->codegen_timer()); -// -// IRFunction::Type ir_fn = IRFunction::PART_AGG_NODE_PROCESS_BATCH_STREAMING; -// Function* process_batch_streaming_fn = codegen->GetFunction(ir_fn, true); -// DCHECK(process_batch_streaming_fn != NULL); -// -// // Make needs_serialize arg constant so dead code can be optimised out. -// Value* needs_serialize_arg = codegen->GetArgument(process_batch_streaming_fn, 2); -// needs_serialize_arg->replaceAllUsesWith( -// ConstantInt::get(Type::getInt1Ty(codegen->context()), needs_serialize_)); -// -// // Replace prefetch_mode with constant so branches can be optimised out. -// Value* prefetch_mode_arg = codegen->GetArgument(process_batch_streaming_fn, 3); -// prefetch_mode_arg->replaceAllUsesWith( -// ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode)); -// -// Function* update_tuple_fn; -// RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn)); -// -// // We only use the top-level hash function for streaming aggregations. -// Function* hash_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, false, &hash_fn)); -// -// // Codegen HashTable::Equals -// Function* equals_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, true, &equals_fn)); -// -// // Codegen for evaluating input rows -// Function* eval_grouping_expr_fn; -// RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(codegen, false, &eval_grouping_expr_fn)); -// -// // Replace call sites -// int replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, update_tuple_fn, -// "UpdateTuple"); -// DCHECK_EQ(replaced, 2); -// -// replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, eval_grouping_expr_fn, -// "EvalProbeRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, hash_fn, "HashRow"); -// DCHECK_EQ(replaced, 1); -// -// replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, equals_fn, "Equals"); -// DCHECK_EQ(replaced, 1); -// -// NewPartitionedHashTableCtx::HashTableReplacedConstants replaced_constants; -// const bool stores_duplicates = false; -// RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates, 1, -// process_batch_streaming_fn, &replaced_constants)); -// DCHECK_GE(replaced_constants.stores_nulls, 1); -// DCHECK_GE(replaced_constants.finds_some_nulls, 1); -// DCHECK_GE(replaced_constants.stores_duplicates, 1); -// DCHECK_GE(replaced_constants.stores_tuples, 1); -// DCHECK_GE(replaced_constants.quadratic_probing, 1); -// -// DCHECK(process_batch_streaming_fn != NULL); -// process_batch_streaming_fn = codegen->FinalizeFunction(process_batch_streaming_fn); -// if (process_batch_streaming_fn == NULL) { -// return Status::InternalError("NewPartitionedAggregationNode::CodegenProcessBatchStreaming(): codegen'd " -// "ProcessBatchStreaming() function failed verification, see log"); -// } -// -// codegen->AddFunctionToJit(process_batch_streaming_fn, -// reinterpret_cast(&process_batch_streaming_fn_)); -// return Status::OK(); -//} - -#endif - // Instantiate required templates. template Status NewPartitionedAggregationNode::AppendSpilledRow( Partition*, TupleRow*); diff --git a/be/src/exec/new_partitioned_aggregation_node.h b/be/src/exec/new_partitioned_aggregation_node.h index ac30c298d9..62a3da441e 100644 --- a/be/src/exec/new_partitioned_aggregation_node.h +++ b/be/src/exec/new_partitioned_aggregation_node.h @@ -30,19 +30,11 @@ #include "runtime/mem_pool.h" #include "runtime/string_value.h" -namespace llvm { -// class BasicBlock; -class Function; -// class Value; -} - namespace doris { class AggFn; class NewAggFnEvaluator; class CodegenAnyVal; -//class LlvmCodeGen; -//class LlvmBuilder; class RowBatch; class RuntimeState; struct StringValue; @@ -139,8 +131,6 @@ class NewPartitionedAggregationNode : public ExecNode { virtual Status reset(RuntimeState* state); virtual Status close(RuntimeState* state); - static const char* LLVM_CLASS_NAME; - protected: /// Frees local allocations from aggregate_evals_ and agg_fn_evals // virtual Status QueryMaintenance(RuntimeState* state); @@ -681,37 +671,6 @@ class NewPartitionedAggregationNode : public ExecNode { void CleanupHashTbl(const std::vector& agg_fn_evals, NewPartitionedHashTable::Iterator it); - /// Codegen UpdateSlot(). Returns non-OK status if codegen is unsuccessful. - /// Assumes is_merge = false; -// Status CodegenUpdateSlot(LlvmCodeGen* codegen, NewAggFnEvaluator* evaluator, -// int evaluator_idx, SlotDescriptor* slot_desc, llvm::Function** fn); - - /// Codegen a call to a function implementing the UDA interface with input values - /// from 'input_vals'. 'dst_val' should contain the previous value of the aggregate - /// function, and 'updated_dst_val' is set to the new value after the Update or Merge - /// operation is applied. The instruction sequence for the UDA call is inserted at - /// the insert position of 'builder'. -// Status CodegenCallUda(LlvmCodeGen* codegen, LlvmBuilder* builder, -// NewAggFnEvaluator* evaluator, llvm::Value* agg_fn_ctx_arg, -// const std::vector& input_vals, const CodegenAnyVal& dst_val, -// CodegenAnyVal* updated_dst_val); - - /// Codegen UpdateTuple(). Returns non-OK status if codegen is unsuccessful. -// Status CodegenUpdateTuple(LlvmCodeGen* codegen, llvm::Function** fn); - - /// Codegen the non-streaming process row batch loop. The loop has already been - /// compiled to IR and loaded into the codegen object. UpdateAggTuple has also been - /// codegen'd to IR. This function will modify the loop subsituting the statically - /// compiled functions with codegen'd ones. 'process_batch_fn_' or - /// 'process_batch_no_grouping_fn_' will be updated with the codegened function - /// depending on whether this is a grouping or non-grouping aggregation. - /// Assumes AGGREGATED_ROWS = false. -// Status CodegenProcessBatch(LlvmCodeGen* codegen); - - /// Codegen the materialization loop for streaming preaggregations. - /// 'process_batch_streaming_fn_' will be updated with the codegened function. -// Status CodegenProcessBatchStreaming(LlvmCodeGen* codegen); - /// Compute minimum buffer reservation for grouping aggregations. /// We need one buffer per partition, which is used either as the write buffer for the /// aggregated stream or the unaggregated stream. We need an additional buffer to read diff --git a/be/src/exec/new_partitioned_hash_table.cc b/be/src/exec/new_partitioned_hash_table.cc index 7cf735d94a..0c195257c2 100644 --- a/be/src/exec/new_partitioned_hash_table.cc +++ b/be/src/exec/new_partitioned_hash_table.cc @@ -21,8 +21,6 @@ #include #include -#include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exec/exec_node.h" #include "exprs/expr.h" #include "exprs/expr_context.h" @@ -37,13 +35,10 @@ #include "common/names.h" using namespace doris; -// using namespace llvm; using namespace strings; // DEFINE_bool(enable_quadratic_probing, true, "Enable quadratic probing hash table"); -const char* NewPartitionedHashTableCtx::LLVM_CLASS_NAME = "class.doris::NewPartitionedHashTableCtx"; - // Random primes to multiply the seed with. static uint32_t SEED_PRIMES[] = { 1, // First seed must be 1, level 0 is used by other operators in the fragment. @@ -623,642 +618,3 @@ string NewPartitionedHashTable::PrintStats() const { ss << "Resizes: " << num_resizes_ << std::endl; return ss.str(); } - -#if 0 - -// Helper function to store a value into the results buffer if the expr -// evaluated to NULL. We don't want (NULL, 1) to hash to the same as (0,1) so -// we'll pick a more random value. -static void CodegenAssignNullValue( - LlvmCodeGen* codegen, LlvmBuilder* builder, Value* dst, const ColumnType& type) { - uint64_t fnv_seed = HashUtil::FNV_SEED; - - if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) { - Value* dst_ptr = builder->CreateStructGEP(NULL, dst, 0, "string_ptr"); - Value* dst_len = builder->CreateStructGEP(NULL, dst, 1, "string_len"); - Value* null_len = codegen->GetIntConstant(TYPE_INT, fnv_seed); - Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); - builder->CreateStore(null_ptr, dst_ptr); - builder->CreateStore(null_len, dst_len); - } else { - Value* null_value = NULL; - int byte_size = type.GetByteSize(); - // Get a type specific representation of fnv_seed - switch (type.type) { - case TYPE_BOOLEAN: - // In results, booleans are stored as 1 byte - dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->GetIntConstant(TYPE_TINYINT, fnv_seed); - break; - case TYPE_TIMESTAMP: { - // Cast 'dst' to 'i128*' - DCHECK_EQ(byte_size, 16); - PointerType* fnv_seed_ptr_type = - codegen->GetPtrType(Type::getIntNTy(codegen->context(), byte_size * 8)); - dst = builder->CreateBitCast(dst, fnv_seed_ptr_type); - null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); - break; - } - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_DECIMAL: - null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed); - break; - case TYPE_FLOAT: { - // Don't care about the value, just the bit pattern - float fnv_seed_float = *reinterpret_cast(&fnv_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_float)); - break; - } - case TYPE_DOUBLE: { - // Don't care about the value, just the bit pattern - double fnv_seed_double = *reinterpret_cast(&fnv_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_double)); - break; - } - default: - DCHECK(false); - } - builder->CreateStore(null_value, dst); - } -} - -// Codegen for evaluating a tuple row over either build_expr_ctxs_ or probe_expr_ctxs_. -// For a group by with (big int, string) the IR looks like: -// -// define i1 @EvalProbeRow(%"class.impala::NewPartitionedHashTableCtx"* %this_ptr, -// %"class.impala::TupleRow"* %row, i8* %expr_values, i8* %expr_values_null) #34 { -// entry: -// %loc_addr = getelementptr i8, i8* %expr_values, i32 0 -// %loc = bitcast i8* %loc_addr to i64* -// %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"* -// inttoptr (i64 197737664 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %0 = extractvalue { i8, i64 } %result, 0 -// %is_null = trunc i8 %0 to i1 -// %1 = zext i1 %is_null to i8 -// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0 -// store i8 %1, i8* %null_byte_loc -// br i1 %is_null, label %null, label %not_null -// -// null: ; preds = %entry -// store i64 2166136261, i64* %loc -// br label %continue -// -// not_null: ; preds = %entry -// %val = extractvalue { i8, i64 } %result, 1 -// store i64 %val, i64* %loc -// br label %continue -// -// continue: ; preds = %not_null, %null -// %is_null_phi = phi i1 [ true, %null ], [ false, %not_null ] -// %has_null = or i1 false, %is_null_phi -// %loc_addr1 = getelementptr i8, i8* %expr_values, i32 8 -// %loc2 = bitcast i8* %loc_addr1 to %"struct.impala::StringValue"* -// %result6 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"* -// inttoptr (i64 197738048 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %2 = extractvalue { i64, i8* } %result6, 0 -// %is_null7 = trunc i64 %2 to i1 -// %3 = zext i1 %is_null7 to i8 -// %null_byte_loc8 = getelementptr i8, i8* %expr_values_null, i32 1 -// store i8 %3, i8* %null_byte_loc8 -// br i1 %is_null7, label %null3, label %not_null4 -// -// null3: ; preds = %continue -// %string_ptr = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %loc2, i32 0, i32 0 -// %string_len = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %loc2, i32 0, i32 1 -// store i8* inttoptr (i32 -2128831035 to i8*), i8** %string_ptr -// store i32 -2128831035, i32* %string_len -// br label %continue5 -// -// not_null4: ; preds = %continue -// %4 = extractvalue { i64, i8* } %result6, 0 -// %5 = ashr i64 %4, 32 -// %6 = trunc i64 %5 to i32 -// %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %6, 1 -// %result9 = extractvalue { i64, i8* } %result6, 1 -// %8 = insertvalue %"struct.impala::StringValue" %7, i8* %result9, 0 -// store %"struct.impala::StringValue" %8, %"struct.impala::StringValue"* %loc2 -// br label %continue5 -// -// continue5: ; preds = %not_null4, %null3 -// %is_null_phi10 = phi i1 [ true, %null3 ], [ false, %not_null4 ] -// %has_null11 = or i1 %has_null, %is_null_phi10 -// ret i1 %has_null11 -// } -// -// For each expr, we create 3 code blocks. The null, not null and continue blocks. -// Both the null and not null branch into the continue block. The continue block -// becomes the start of the next block for codegen (either the next expr or just the -// end of the function). -Status NewPartitionedHashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build, Function** fn) { - const vector& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_; - for (int i = 0; i < ctxs.size(); ++i) { - // Disable codegen for CHAR - if (ctxs[i]->root()->type().type == TYPE_CHAR) { - return Status::InternalError("NewPartitionedHashTableCtx::CodegenEvalRow(): CHAR NYI"); - } - } - - // Get types to generate function prototype - Type* this_type = codegen->GetType(NewPartitionedHashTableCtx::LLVM_CLASS_NAME); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = codegen->GetPtrType(this_type); - Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type); - LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : "EvalProbeRow", - codegen->GetType(TYPE_BOOLEAN)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type())); - prototype.AddArgument( - LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type())); - - LLVMContext& context = codegen->context(); - LlvmBuilder builder(context); - Value* args[4]; - *fn = prototype.GeneratePrototype(&builder, args); - Value* this_ptr = args[0]; - Value* row = args[1]; - Value* expr_values = args[2]; - Value* expr_values_null = args[3]; - Value* has_null = codegen->false_value(); - - // ctx_vector = &build_expr_ctxs_[0] / ctx_vector = &probe_expr_ctxs_[0] - Value* ctx_vector = codegen->CodegenCallFunction(&builder, build ? - IRFunction::HASH_TABLE_GET_BUILD_EXPR_CTX : - IRFunction::HASH_TABLE_GET_PROBE_EXPR_CTX, - this_ptr, "ctx_vector"); - - for (int i = 0; i < ctxs.size(); ++i) { - // TODO: refactor this to somewhere else? This is not hash table specific except for - // the null handling bit and would be used for anyone that needs to materialize a - // vector of exprs - // Convert result buffer to llvm ptr type - int offset = expr_values_cache_.expr_values_offsets(i); - Value* loc = builder.CreateInBoundsGEP( - NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc_addr"); - Value* llvm_loc = builder.CreatePointerCast( - loc, codegen->GetPtrType(ctxs[i]->root()->type()), "loc"); - - BasicBlock* null_block = BasicBlock::Create(context, "null", *fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", *fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", *fn); - - // Call expr - Function* expr_fn; - Status status = ctxs[i]->root()->GetCodegendComputeFn(codegen, &expr_fn); - if (!status.ok()) { - (*fn)->eraseFromParent(); // deletes function - *fn = NULL; - return Status::InternalError(Substitute( - "Problem with NewPartitionedHashTableCtx::CodegenEvalRow(): $0", status.GetDetail())); - } - - // Avoid bloating function by inlining too many exprs into it. - if (i >= LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) { - codegen->SetNoInline(expr_fn); - } - - Value* expr_ctx = codegen->CodegenArrayAt(&builder, ctx_vector, i, "expr_ctx"); - CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped( - codegen, &builder, ctxs[i]->root()->type(), expr_fn, {expr_ctx, row}, "result"); - Value* is_null = result.GetIsNull(); - - // Set null-byte result - Value* null_byte = builder.CreateZExt(is_null, codegen->GetType(TYPE_TINYINT)); - Value* llvm_null_byte_loc = builder.CreateInBoundsGEP( - NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc"); - builder.CreateStore(null_byte, llvm_null_byte_loc); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - if (!stores_nulls_) { - // hash table doesn't store nulls, no reason to keep evaluating exprs - builder.CreateRet(codegen->true_value()); - } else { - CodegenAssignNullValue(codegen, &builder, llvm_loc, ctxs[i]->root()->type()); - builder.CreateBr(continue_block); - } - - // Not null block - builder.SetInsertPoint(not_null_block); - result.ToNativePtr(llvm_loc); - builder.CreateBr(continue_block); - - // Continue block - builder.SetInsertPoint(continue_block); - if (stores_nulls_) { - // Update has_null - PHINode* is_null_phi = builder.CreatePHI(codegen->boolean_type(), 2, "is_null_phi"); - is_null_phi->addIncoming(codegen->true_value(), null_block); - is_null_phi->addIncoming(codegen->false_value(), not_null_block); - has_null = builder.CreateOr(has_null, is_null_phi, "has_null"); - } - } - builder.CreateRet(has_null); - - // Avoid inlining a large EvalRow() function into caller. - if (ctxs.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { - codegen->SetNoInline(*fn); - } - - *fn = codegen->FinalizeFunction(*fn); - if (*fn == NULL) { - return Status::InternalError("Codegen'd NewPartitionedHashTableCtx::EvalRow() function failed verification, " - "see log"); - } - return Status::OK(); -} - -// Codegen for hashing the current row. In the case with both string and non-string data -// (group by int_col, string_col), the IR looks like: -// -// define i32 @HashRow(%"class.impala::NewPartitionedHashTableCtx"* %this_ptr, i8* %expr_values, -// i8* %expr_values_null) #34 { -// entry: -// %seed = call i32 @_ZNK6impala12NewPartitionedHashTableCtx11GetHashSeedEv( -// %"class.impala::NewPartitionedHashTableCtx"* %this_ptr) -// %hash = call i32 @CrcHash8(i8* %expr_values, i32 8, i32 %seed) -// %loc_addr = getelementptr i8, i8* %expr_values, i32 8 -// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 1 -// %null_byte = load i8, i8* %null_byte_loc -// %is_null = icmp ne i8 %null_byte, 0 -// br i1 %is_null, label %null, label %not_null -// -// null: ; preds = %entry -// %str_null = call i32 @CrcHash16(i8* %loc_addr, i32 16, i32 %hash) -// br label %continue -// -// not_null: ; preds = %entry -// %str_val = bitcast i8* %loc_addr to %"struct.impala::StringValue"* -// %0 = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %str_val, i32 0, i32 0 -// %1 = getelementptr inbounds %"struct.impala::StringValue", -// %"struct.impala::StringValue"* %str_val, i32 0, i32 1 -// %ptr = load i8*, i8** %0 -// %len = load i32, i32* %1 -// %string_hash = call i32 @IrCrcHash(i8* %ptr, i32 %len, i32 %hash) -// br label %continue -// -// continue: ; preds = %not_null, %null -// %hash_phi = phi i32 [ %string_hash, %not_null ], [ %str_null, %null ] -// ret i32 %hash_phi -// } -Status NewPartitionedHashTableCtx::CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur, Function** fn) { - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - // Disable codegen for CHAR - if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) { - return Status::InternalError("NewPartitionedHashTableCtx::CodegenHashRow(): CHAR NYI"); - } - } - - // Get types to generate function prototype - Type* this_type = codegen->GetType(NewPartitionedHashTableCtx::LLVM_CLASS_NAME); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = codegen->GetPtrType(this_type); - - LlvmCodeGen::FnPrototype prototype( - codegen, (use_murmur ? "MurmurHashRow" : "HashRow"), codegen->GetType(TYPE_INT)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type())); - prototype.AddArgument( - LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type())); - - LLVMContext& context = codegen->context(); - LlvmBuilder builder(context); - Value* args[3]; - *fn = prototype.GeneratePrototype(&builder, args); - Value* this_arg = args[0]; - Value* expr_values = args[1]; - Value* expr_values_null = args[2]; - - // Call GetHashSeed() to get seeds_[level_] - Value* seed = codegen->CodegenCallFunction(&builder, - IRFunction::HASH_TABLE_GET_HASH_SEED, this_arg, "seed"); - - Value* hash_result = seed; - const int var_result_offset = expr_values_cache_.var_result_offset(); - const int expr_values_bytes_per_row = expr_values_cache_.expr_values_bytes_per_row(); - if (var_result_offset == -1) { - // No variable length slots, just hash what is in 'expr_expr_values_cache_' - if (expr_values_bytes_per_row > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(expr_values_bytes_per_row) : - codegen->GetHashFunction(expr_values_bytes_per_row); - Value* len = codegen->GetIntConstant(TYPE_INT, expr_values_bytes_per_row); - hash_result = builder.CreateCall( - hash_fn, ArrayRef({expr_values, len, hash_result}), "hash"); - } - } else { - if (var_result_offset > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(var_result_offset) : - codegen->GetHashFunction(var_result_offset); - Value* len = codegen->GetIntConstant(TYPE_INT, var_result_offset); - hash_result = builder.CreateCall( - hash_fn, ArrayRef({expr_values, len, hash_result}), "hash"); - } - - // Hash string slots - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING - && build_expr_ctxs_[i]->root()->type().type != TYPE_VARCHAR) continue; - - BasicBlock* null_block = NULL; - BasicBlock* not_null_block = NULL; - BasicBlock* continue_block = NULL; - Value* str_null_result = NULL; - - int offset = expr_values_cache_.expr_values_offsets(i); - Value* llvm_loc = builder.CreateInBoundsGEP( - NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc_addr"); - - // If the hash table stores nulls, we need to check if the stringval - // evaluated to NULL - if (stores_nulls_) { - null_block = BasicBlock::Create(context, "null", *fn); - not_null_block = BasicBlock::Create(context, "not_null", *fn); - continue_block = BasicBlock::Create(context, "continue", *fn); - - Value* llvm_null_byte_loc = builder.CreateInBoundsGEP(NULL, expr_values_null, - codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc"); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte"); - Value* is_null = builder.CreateICmpNE( - null_byte, codegen->GetIntConstant(TYPE_TINYINT, 0), "is_null"); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // For null, we just want to call the hash function on the portion of - // the data - builder.SetInsertPoint(null_block); - Function* null_hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(sizeof(StringValue)) : - codegen->GetHashFunction(sizeof(StringValue)); - Value* len = codegen->GetIntConstant(TYPE_INT, sizeof(StringValue)); - str_null_result = builder.CreateCall(null_hash_fn, - ArrayRef({llvm_loc, len, hash_result}), "str_null"); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(not_null_block); - } - - // Convert expr_values_buffer_ loc to llvm value - Value* str_val = builder.CreatePointerCast(llvm_loc, - codegen->GetPtrType(TYPE_STRING), "str_val"); - - Value* ptr = builder.CreateStructGEP(NULL, str_val, 0); - Value* len = builder.CreateStructGEP(NULL, str_val, 1); - ptr = builder.CreateLoad(ptr, "ptr"); - len = builder.CreateLoad(len, "len"); - - // Call hash(ptr, len, hash_result); - Function* general_hash_fn = use_murmur ? codegen->GetMurmurHashFunction() : - codegen->GetHashFunction(); - Value* string_hash_result = builder.CreateCall(general_hash_fn, - ArrayRef({ptr, len, hash_result}), "string_hash"); - - if (stores_nulls_) { - builder.CreateBr(continue_block); - builder.SetInsertPoint(continue_block); - // Use phi node to reconcile that we could have come from the string-null - // path and string not null paths. - PHINode* phi_node = builder.CreatePHI(codegen->GetType(TYPE_INT), 2, "hash_phi"); - phi_node->addIncoming(string_hash_result, not_null_block); - phi_node->addIncoming(str_null_result, null_block); - hash_result = phi_node; - } else { - hash_result = string_hash_result; - } - } - } - - builder.CreateRet(hash_result); - - // Avoid inlining into caller if there are many exprs. - if (build_expr_ctxs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { - codegen->SetNoInline(*fn); - } - *fn = codegen->FinalizeFunction(*fn); - if (*fn == NULL) { - return Status::InternalError( - "Codegen'd NewPartitionedHashTableCtx::HashRow() function failed verification, see log"); - } - return Status::OK(); -} - -// Codegen for NewPartitionedHashTableCtx::Equals. For a group by with (bigint, string), -// the IR looks like: -// -// define i1 @Equals(%"class.impala::NewPartitionedHashTableCtx"* %this_ptr, %"class.impala::TupleRow"* -// %row, -// i8* %expr_values, i8* %expr_values_null) #34 { -// entry: -// %0 = alloca { i64, i8* } -// %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"* -// inttoptr (i64 139107136 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %1 = extractvalue { i8, i64 } %result, 0 -// %is_null = trunc i8 %1 to i1 -// %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0 -// %2 = load i8, i8* %null_byte_loc -// %3 = icmp ne i8 %2, 0 -// %loc = getelementptr i8, i8* %expr_values, i32 0 -// %row_val = bitcast i8* %loc to i64* -// br i1 %is_null, label %null, label %not_null -// -// false_block: ; preds = %cmp9, %not_null2, %null1, -// %cmp, %not_null, %null -// ret i1 false -// -// null: ; preds = %entry -// br i1 %3, label %continue, label %false_block -// -// not_null: ; preds = %entry -// br i1 %3, label %false_block, label %cmp -// -// continue: ; preds = %cmp, %null -// %result4 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"* -// inttoptr (i64 139107328 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %4 = extractvalue { i64, i8* } %result4, 0 -// %is_null5 = trunc i64 %4 to i1 -// %null_byte_loc6 = getelementptr i8, i8* %expr_values_null, i32 1 -// %5 = load i8, i8* %null_byte_loc6 -// %6 = icmp ne i8 %5, 0 -// %loc7 = getelementptr i8, i8* %expr_values, i32 8 -// %row_val8 = bitcast i8* %loc7 to %"struct.impala::StringValue"* -// br i1 %is_null5, label %null1, label %not_null2 -// -// cmp: ; preds = %not_null -// %7 = load i64, i64* %row_val -// %val = extractvalue { i8, i64 } %result, 1 -// %cmp_raw = icmp eq i64 %val, %7 -// br i1 %cmp_raw, label %continue, label %false_block -// -// null1: ; preds = %continue -// br i1 %6, label %continue3, label %false_block -// -// not_null2: ; preds = %continue -// br i1 %6, label %false_block, label %cmp9 -// -// continue3: ; preds = %cmp9, %null1 -// ret i1 true -// -// cmp9: ; preds = %not_null2 -// store { i64, i8* } %result4, { i64, i8* }* %0 -// %8 = bitcast { i64, i8* }* %0 to %"struct.impala_udf::StringVal"* -// %cmp_raw10 = call i1 -// @_Z13StringValueEqRKN10impala_udf9StringValERKN6impala11StringValueE( -// %"struct.impala_udf::StringVal"* %8, %"struct.impala::StringValue"* %row_val8) -// br i1 %cmp_raw10, label %continue3, label %false_block -// } -Status NewPartitionedHashTableCtx::CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality, - Function** fn) { - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - // Disable codegen for CHAR - if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) { - return Status::InternalError("NewPartitionedHashTableCtx::CodegenEquals(): CHAR NYI"); - } - } - - // Get types to generate function prototype - Type* this_type = codegen->GetType(NewPartitionedHashTableCtx::LLVM_CLASS_NAME); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = codegen->GetPtrType(this_type); - Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type); - - LlvmCodeGen::FnPrototype prototype(codegen, "Equals", codegen->GetType(TYPE_BOOLEAN)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type())); - prototype.AddArgument( - LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type())); - - LLVMContext& context = codegen->context(); - LlvmBuilder builder(context); - Value* args[4]; - *fn = prototype.GeneratePrototype(&builder, args); - Value* this_ptr = args[0]; - Value* row = args[1]; - Value* expr_values = args[2]; - Value* expr_values_null = args[3]; - - // ctx_vector = &build_expr_ctxs_[0] - Value* ctx_vector = codegen->CodegenCallFunction(&builder, - IRFunction::HASH_TABLE_GET_BUILD_EXPR_CTX, this_ptr, "ctx_vector"); - - BasicBlock* false_block = BasicBlock::Create(context, "false_block", *fn); - for (int i = 0; i < build_expr_ctxs_.size(); ++i) { - BasicBlock* null_block = BasicBlock::Create(context, "null", *fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", *fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", *fn); - - // call GetValue on build_exprs[i] - Function* expr_fn; - Status status = build_expr_ctxs_[i]->root()->GetCodegendComputeFn(codegen, &expr_fn); - if (!status.ok()) { - (*fn)->eraseFromParent(); // deletes function - *fn = NULL; - return Status::InternalError( - Substitute("Problem with NewPartitionedHashTableCtx::CodegenEquals: $0", status.GetDetail())); - } - if (build_expr_ctxs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) { - // Avoid bloating function by inlining too many exprs into it. - codegen->SetNoInline(expr_fn); - } - - // Load ExprContext*: expr_ctx = ctx_vector[i]; - Value* expr_ctx = codegen->CodegenArrayAt(&builder, ctx_vector, i, "expr_ctx"); - - // Evaluate the expression. - CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(codegen, &builder, - build_expr_ctxs_[i]->root()->type(), expr_fn, {expr_ctx, row}, "result"); - Value* is_null = result.GetIsNull(); - - // Determine if row is null (i.e. expr_values_null[i] == true). In - // the case where the hash table does not store nulls, this is always false. - Value* row_is_null = codegen->false_value(); - - // We consider null values equal if we are comparing build rows or if the join - // predicate is <=> - if (force_null_equality || finds_nulls_[i]) { - Value* llvm_null_byte_loc = builder.CreateInBoundsGEP( - NULL, expr_values_null, codegen->GetIntConstant(TYPE_INT, i), "null_byte_loc"); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - row_is_null = - builder.CreateICmpNE(null_byte, codegen->GetIntConstant(TYPE_TINYINT, 0)); - } - - // Get llvm value for row_val from 'expr_values' - int offset = expr_values_cache_.expr_values_offsets(i); - Value* loc = builder.CreateInBoundsGEP( - NULL, expr_values, codegen->GetIntConstant(TYPE_INT, offset), "loc"); - Value* row_val = builder.CreatePointerCast( - loc, codegen->GetPtrType(build_expr_ctxs_[i]->root()->type()), "row_val"); - - // Branch for GetValue() returning NULL - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - builder.CreateCondBr(row_is_null, continue_block, false_block); - - // Not-null block - builder.SetInsertPoint(not_null_block); - if (stores_nulls_) { - BasicBlock* cmp_block = BasicBlock::Create(context, "cmp", *fn); - // First need to compare that row expr[i] is not null - builder.CreateCondBr(row_is_null, false_block, cmp_block); - builder.SetInsertPoint(cmp_block); - } - // Check result == row_val - Value* is_equal = result.EqToNativePtr(row_val); - builder.CreateCondBr(is_equal, continue_block, false_block); - - builder.SetInsertPoint(continue_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - - // Avoid inlining into caller if it is large. - if (build_expr_ctxs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) { - codegen->SetNoInline(*fn); - } - *fn = codegen->FinalizeFunction(*fn); - if (*fn == NULL) { - return Status::InternalError("Codegen'd NewPartitionedHashTableCtx::Equals() function failed verification, " - "see log"); - } - return Status::OK(); -} - -Status NewPartitionedHashTableCtx::ReplaceHashTableConstants(LlvmCodeGen* codegen, - bool stores_duplicates, int num_build_tuples, Function* fn, - HashTableReplacedConstants* replacement_counts) { - - replacement_counts->stores_nulls = codegen->ReplaceCallSitesWithBoolConst( - fn, stores_nulls(), "stores_nulls"); - replacement_counts->finds_some_nulls = codegen->ReplaceCallSitesWithBoolConst( - fn, finds_some_nulls(), "finds_some_nulls"); - replacement_counts->stores_tuples = codegen->ReplaceCallSitesWithBoolConst( - fn, num_build_tuples == 1, "stores_tuples"); - replacement_counts->stores_duplicates = codegen->ReplaceCallSitesWithBoolConst( - fn, stores_duplicates, "stores_duplicates"); - replacement_counts->quadratic_probing = codegen->ReplaceCallSitesWithBoolConst( - fn, FLAGS_enable_quadratic_probing, "quadratic_probing"); - return Status::OK(); -} - -#endif - diff --git a/be/src/exec/new_partitioned_hash_table.h b/be/src/exec/new_partitioned_hash_table.h index fda89dbff6..e4faf7a95a 100644 --- a/be/src/exec/new_partitioned_hash_table.h +++ b/be/src/exec/new_partitioned_hash_table.h @@ -33,15 +33,10 @@ #include "util/bitmap.h" #include "util/hash_util.hpp" -namespace llvm { - class Function; -} - namespace doris { class Expr; class ExprContext; -class LlvmCodeGen; class MemTracker; class RowDescriptor; class RuntimeState; @@ -169,24 +164,6 @@ class NewPartitionedHashTableCtx { bool IR_ALWAYS_INLINE EvalAndHashBuild(TupleRow* row); bool IR_ALWAYS_INLINE EvalAndHashProbe(TupleRow* row); - /// Codegen for evaluating a tuple row. Codegen'd function matches the signature - /// for EvalBuildRow and EvalTupleRow. - /// If build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs. - Status CodegenEvalRow(LlvmCodeGen* codegen, bool build_row, llvm::Function** fn); - - /// Codegen for evaluating a TupleRow and comparing equality. Function signature - /// matches HashTable::Equals(). 'force_null_equality' is true if the generated - /// equality function should treat all NULLs as equal. See the template parameter - /// to HashTable::Equals(). - Status CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality, - llvm::Function** fn); - - /// Codegen for hashing expr values. Function prototype matches HashRow identically. - /// Unlike HashRow(), the returned function only uses a single hash function, rather - /// than switching based on level_. If 'use_murmur' is true, murmur hash is used, - /// otherwise CRC is used if the hardware supports it (see hash-util.h). - Status CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur, llvm::Function** fn); - /// Struct that returns the number of constants replaced by ReplaceConstants(). struct HashTableReplacedConstants { int stores_nulls; @@ -196,15 +173,6 @@ class NewPartitionedHashTableCtx { int quadratic_probing; }; - /// Replace hash table parameters with constants in 'fn'. Updates 'replacement_counts' - /// with the number of replacements made. 'num_build_tuples' and 'stores_duplicates' - /// correspond to HashTable parameters with the same name. - Status ReplaceHashTableConstants(LlvmCodeGen* codegen, bool stores_duplicates, - int num_build_tuples, llvm::Function* fn, - HashTableReplacedConstants* replacement_counts); - - static const char* LLVM_CLASS_NAME; - /// To enable prefetching, the hash table building and probing are pipelined by the /// exec nodes. A set of rows in a row batch will be evaluated and hashed first and /// the corresponding hash table buckets are prefetched before they are probed against @@ -483,11 +451,6 @@ class NewPartitionedHashTableCtx { bool IR_NO_INLINE stores_nulls() const { return stores_nulls_; } bool IR_NO_INLINE finds_some_nulls() const { return finds_some_nulls_; } - /// Cross-compiled function to access the build/probe expression context. - /// Called by generated LLVM IR functions such as Equals() and EvalRow(). - ExprContext* const* IR_ALWAYS_INLINE build_expr_evals() const; - ExprContext* const* IR_ALWAYS_INLINE probe_expr_evals() const; - const std::vector& build_exprs_; std::vector build_expr_evals_; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index db769c6ba4..921b539b49 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -24,7 +24,6 @@ #include #include -#include "codegen/llvm_codegen.h" #include "common/logging.h" #include "exprs/expr.h" #include "exprs/binary_predicate.h" @@ -42,8 +41,6 @@ #include "common/resource_tls.h" #include -using llvm::Function; - namespace doris { #define DS_SUCCESS(x) ((x) >= 0) @@ -178,17 +175,6 @@ Status OlapScanNode::prepare(RuntimeState* state) { _string_slots.push_back(slots[i]); } - if (state->codegen_level() > 0) { - LlvmCodeGen* codegen = NULL; - RETURN_IF_ERROR(state->get_codegen(&codegen)); - Function* codegen_eval_conjuncts_fn = codegen_eval_conjuncts(state, _conjunct_ctxs); - if (codegen_eval_conjuncts_fn != NULL) { - codegen->add_function_to_jit(codegen_eval_conjuncts_fn, - reinterpret_cast(&_eval_conjuncts_fn)); - // AddRuntimeExecOption("Probe Side Codegen Enabled"); - } - } - _runtime_state = state; return Status::OK(); } diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index a9bc315f64..8d31a9074a 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -21,8 +21,6 @@ #include #include -#include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exec/partitioned_hash_table.inline.h" #include "exprs/agg_fn_evaluator.h" #include "exprs/expr.h" @@ -43,14 +41,10 @@ #include "gen_cpp/Exprs_types.h" #include "gen_cpp/PlanNodes_types.h" -// using namespace llvm; using std::list; namespace doris { -const char* PartitionedAggregationNode::_s_llvm_class_name = - "class.doris::PartitionedAggregationNode"; - PartitionedAggregationNode::PartitionedAggregationNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), @@ -93,14 +87,6 @@ Status PartitionedAggregationNode::init(const TPlanNode& tnode, RuntimeState* st Status PartitionedAggregationNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - // Create the codegen object before preparing _conjunct_ctxs and _children, so that any - // ScalarFnCalls will use codegen. - // TODO: this is brittle and hard to reason about, revisit - // if (state->codegen_enabled()) { - // LlvmCodeGen* codegen; - // RETURN_IF_ERROR(state->get_codegen(&codegen)); - // } - RETURN_IF_ERROR(ExecNode::prepare(state)); _state = state; @@ -201,16 +187,6 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { DCHECK(_serialize_stream->has_write_block()); } - // if (state->codegen_enabled()) { - // LlvmCodeGen* codegen; - // RETURN_IF_ERROR(state->get_codegen(&codegen)); - // Function* codegen_process_row_batch_fn = codegen_process_batch(); - // if (codegen_process_row_batch_fn != NULL) { - // codegen->AddFunctionToJit(codegen_process_row_batch_fn, - // reinterpret_cast(&_process_row_batch_fn)); - // add_runtime_exec_option("Codegen Enabled"); - // } - // } return Status::OK(); } @@ -1111,461 +1087,7 @@ void PartitionedAggregationNode::close_partitions() { // } // return ExecNode::QueryMaintenance(state); // } - -// IR Generation for updating a single aggregation slot. Signature is: -// void UpdateSlot(FunctionContext* fn_ctx, AggTuple* agg_tuple, char** row) // -// The IR for sum(double_col) is: -// define void @UpdateSlot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, double }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 128241264 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, double }* %agg_tuple) -// %dst_val = load double* %dst_slot_ptr -// %val = extractvalue { i8, double } %src, 1 -// %1 = fadd double %dst_val, %val -// store double %1, double* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -// -// The IR for ndv(double_col) is: -// define void @UpdateSlot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, %"struct.doris::StringValue" }* %agg_tuple, -// %"class.doris::TupleRow"* %row) #20 { -// entry: -// %dst_lowered_ptr = alloca { i64, i8* } -// %src_lowered_ptr = alloca { i8, double } -// %src = call { i8, double } @GetSlotRef(%"class.doris::ExprContext"* inttoptr -// (i64 120530832 to %"class.doris::ExprContext"*), %"class.doris::TupleRow"* %row) -// %0 = extractvalue { i8, double } %src, 0 -// %is_null = trunc i8 %0 to i1 -// br i1 %is_null, label %ret, label %src_not_null -// -// src_not_null: ; preds = %entry -// %dst_slot_ptr = getelementptr inbounds -// { i8, %"struct.doris::StringValue" }* %agg_tuple, i32 0, i32 1 -// call void @SetNotNull({ i8, %"struct.doris::StringValue" }* %agg_tuple) -// %dst_val = load %"struct.doris::StringValue"* %dst_slot_ptr -// store { i8, double } %src, { i8, double }* %src_lowered_ptr -// %src_unlowered_ptr = bitcast { i8, double }* %src_lowered_ptr -// to %"struct.doris_udf::DoubleVal"* -// %ptr = extractvalue %"struct.doris::StringValue" %dst_val, 0 -// %dst_stringval = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1 -// %len = extractvalue %"struct.doris::StringValue" %dst_val, 1 -// %1 = extractvalue { i64, i8* } %dst_stringval, 0 -// %2 = zext i32 %len to i64 -// %3 = shl i64 %2, 32 -// %4 = and i64 %1, 4294967295 -// %5 = or i64 %4, %3 -// %dst_stringval1 = insertvalue { i64, i8* } %dst_stringval, i64 %5, 0 -// store { i64, i8* } %dst_stringval1, { i64, i8* }* %dst_lowered_ptr -// %dst_unlowered_ptr = bitcast { i64, i8* }* %dst_lowered_ptr -// to %"struct.doris_udf::StringVal"* -// call void @HllUpdate(%"class.doris_udf::FunctionContext"* %fn_ctx, -// %"struct.doris_udf::DoubleVal"* %src_unlowered_ptr, -// %"struct.doris_udf::StringVal"* %dst_unlowered_ptr) -// %anyval_result = load { i64, i8* }* %dst_lowered_ptr -// %6 = extractvalue { i64, i8* } %anyval_result, 1 -// %7 = insertvalue %"struct.doris::StringValue" zeroinitializer, i8* %6, 0 -// %8 = extractvalue { i64, i8* } %anyval_result, 0 -// %9 = ashr i64 %8, 32 -// %10 = trunc i64 %9 to i32 -// %11 = insertvalue %"struct.doris::StringValue" %7, i32 %10, 1 -// store %"struct.doris::StringValue" %11, %"struct.doris::StringValue"* %dst_slot_ptr -// br label %ret -// -// ret: ; preds = %src_not_null, %entry -// ret void -// } -llvm::Function* PartitionedAggregationNode::codegen_update_slot( - AggFnEvaluator* evaluator, SlotDescriptor* slot_desc) { - DCHECK(slot_desc->is_materialized()); - LlvmCodeGen* codegen = NULL; - if (!_state->get_codegen(&codegen).ok()) { - return NULL; - } - - DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1); - ExprContext* input_expr_ctx = evaluator->input_expr_ctxs()[0]; - Expr* input_expr = input_expr_ctx->root(); - - // TODO: implement timestamp - // if (input_expr->type().type == TYPE_TIMESTAMP && - // evaluator->agg_op() != AggFnEvaluator::AVG) { - // return NULL; - // } - - Function* agg_expr_fn = NULL; - Status status = input_expr->get_codegend_compute_fn(_state, &agg_expr_fn); - if (!status.ok()) { - VLOG_QUERY << "Could not codegen UpdateSlot(): " << status.get_error_msg(); - return NULL; - } - DCHECK(agg_expr_fn != NULL); - - PointerType* fn_ctx_type = - codegen->get_ptr_type(FunctionContextImpl::_s_llvm_functioncontext_name); - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - if (tuple_struct == NULL) return NULL; // Could not generate tuple struct - PointerType* tuple_ptr_type = PointerType::get(tuple_struct, 0); - PointerType* tuple_row_ptr_type = codegen->get_ptr_type(TupleRow::_s_llvm_class_name); - - // Create UpdateSlot prototype - LlvmCodeGen::FnPrototype prototype(codegen, "UpdateSlot", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("fn_ctx", fn_ctx_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[3]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - Value* fn_ctx_arg = args[0]; - Value* agg_tuple_arg = args[1]; - Value* row_arg = args[2]; - - BasicBlock* src_not_null_block = - BasicBlock::create(codegen->context(), "src_not_null", fn); - BasicBlock* ret_block = BasicBlock::create(codegen->context(), "ret", fn); - - // Call expr function to get src slot value - Value* expr_ctx = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), input_expr_ctx); - Value* agg_expr_fn_args[] = { expr_ctx, row_arg }; - CodegenAnyVal src = CodegenAnyVal::create_call_wrapped( - codegen, &builder, input_expr->type(), agg_expr_fn, agg_expr_fn_args, "src"); - - Value* src_is_null = src.get_is_null(); - builder.create_cond_br(src_is_null, ret_block, src_not_null_block); - - // Src slot is not null, update dst_slot - builder.set_insert_point(src_not_null_block); - Value* dst_ptr = - builder.create_struct_gep(agg_tuple_arg, slot_desc->field_idx(), "dst_slot_ptr"); - Value* result = NULL; - - if (slot_desc->is_nullable()) { - // Dst is NULL, just update dst slot to src slot and clear null bit - Function* clear_null_fn = slot_desc->CodegenUpdateNull(codegen, tuple_struct, false); - builder.CreateCall(clear_null_fn, agg_tuple_arg); - } - - // Update the slot - Value* dst_value = builder.CreateLoad(dst_ptr, "dst_val"); - switch (evaluator->agg_op()) { - case AggFnEvaluator::COUNT: - if (evaluator->is_merge()) { - result = builder.CreateAdd(dst_value, src.GetVal(), "count_sum"); - } else { - result = builder.CreateAdd(dst_value, - codegen->get_int_constant(TYPE_BIGINT, 1), "count_inc"); - } - break; - case AggFnEvaluator::MIN: { - Function* min_fn = codegen->CodegenMinMax(slot_desc->type(), true); - Value* min_args[] = { dst_value, src.GetVal() }; - result = builder.CreateCall(min_fn, min_args, "min_value"); - break; - } - case AggFnEvaluator::MAX: { - Function* max_fn = codegen->CodegenMinMax(slot_desc->type(), false); - Value* max_args[] = { dst_value, src.GetVal() }; - result = builder.CreateCall(max_fn, max_args, "max_value"); - break; - } - case AggFnEvaluator::SUM: - if (slot_desc->type().type != TYPE_DECIMAL && slot_desc->type().type != TYPE_DECIMALV2) { - if (slot_desc->type().type == TYPE_FLOAT || - slot_desc->type().type == TYPE_DOUBLE) { - result = builder.CreateFAdd(dst_value, src.GetVal()); - } else { - result = builder.CreateAdd(dst_value, src.GetVal()); - } - break; - } - DCHECK(slot_desc->type().type == TYPE_DECIMAL || slot_desc->type().type == TYPE_DECIMALV2); - // Fall through to xcompiled case - case AggFnEvaluator::AVG: - case AggFnEvaluator::NDV: { - // Get xcompiled update/merge function from IR module - const string& symbol = evaluator->is_merge() ? - evaluator->merge_symbol() : evaluator->update_symbol(); - Function* ir_fn = codegen->module()->getFunction(symbol); - DCHECK(ir_fn != NULL); - - // Create pointer to src to pass to ir_fn. We must use the unlowered type. - Value* src_lowered_ptr = codegen->CreateEntryBlockAlloca( - fn, LlvmCodeGen::NamedVariable("src_lowered_ptr", src.value()->getType())); - builder.CreateStore(src.value(), src_lowered_ptr); - Type* unlowered_ptr_type = - CodegenAnyVal::GetUnloweredPtrType(codegen, input_expr->type()); - Value* src_unlowered_ptr = - builder.CreateBitCast(src_lowered_ptr, unlowered_ptr_type, "src_unlowered_ptr"); - - // Create intermediate argument 'dst' from 'dst_value' - const ColumnType& dst_type = evaluator->intermediate_type(); - CodegenAnyVal dst = CodegenAnyVal::GetNonNullVal( - codegen, &builder, dst_type, "dst"); - dst.SetFromRawValue(dst_value); - // Create pointer to dst to pass to ir_fn. We must use the unlowered type. - Value* dst_lowered_ptr = codegen->CreateEntryBlockAlloca( - fn, LlvmCodeGen::NamedVariable("dst_lowered_ptr", dst.value()->getType())); - builder.CreateStore(dst.value(), dst_lowered_ptr); - unlowered_ptr_type = CodegenAnyVal::GetUnloweredPtrType(codegen, dst_type); - Value* dst_unlowered_ptr = - builder.CreateBitCast(dst_lowered_ptr, unlowered_ptr_type, "dst_unlowered_ptr"); - - // Call 'ir_fn' - builder.CreateCall3(ir_fn, fn_ctx_arg, src_unlowered_ptr, dst_unlowered_ptr); - - // Convert StringVal intermediate 'dst_arg' back to StringValue - Value* anyval_result = builder.CreateLoad(dst_lowered_ptr, "anyval_result"); - result = CodegenAnyVal(codegen, &builder, dst_type, anyval_result).ToNativeValue(); - break; - } - default: - DCHECK(false) << "bad aggregate operator: " << evaluator->agg_op(); - } - - builder.CreateStore(result, dst_ptr); - builder.CreateBr(ret_block); - - builder.SetInsertPoint(ret_block); - builder.CreateRetVoid(); - - return codegen->FinalizeFunction(fn); -} - -// IR codegen for the update_tuple loop. This loop is query specific and based on the -// aggregate functions. The function signature must match the non- codegen'd update_tuple -// exactly. -// For the query: -// select count(*), count(int_col), sum(double_col) the IR looks like: -// - -// ; Function Attrs: alwaysinline -// define void @update_tuple(%"class.doris::PartitionedAggregationNode"* %this_ptr, -// %"class.doris_udf::FunctionContext"** %agg_fn_ctxs, -// %"class.doris::Tuple"* %tuple, -// %"class.doris::TupleRow"* %row, -// i1 %is_merge) #20 { -// entry: -// %tuple1 = bitcast %"class.doris::Tuple"* %tuple to { i8, i64, i64, double }* -// %src_slot = getelementptr inbounds { i8, i64, i64, double }* %tuple1, i32 0, i32 1 -// %count_star_val = load i64* %src_slot -// %count_star_inc = add i64 %count_star_val, 1 -// store i64 %count_star_inc, i64* %src_slot -// %0 = getelementptr %"class.doris_udf::FunctionContext"** %agg_fn_ctxs, i32 1 -// %fn_ctx = load %"class.doris_udf::FunctionContext"** %0 -// call void @UpdateSlot(%"class.doris_udf::FunctionContext"* %fn_ctx, -// { i8, i64, i64, double }* %tuple1, -// %"class.doris::TupleRow"* %row) -// %1 = getelementptr %"class.doris_udf::FunctionContext"** %agg_fn_ctxs, i32 2 -// %fn_ctx2 = load %"class.doris_udf::FunctionContext"** %1 -// call void @UpdateSlot5(%"class.doris_udf::FunctionContext"* %fn_ctx2, -// { i8, i64, i64, double }* %tuple1, -// %"class.doris::TupleRow"* %row) -// ret void -// } -Function* PartitionedAggregationNode::codegen_update_tuple() { - LlvmCodeGen* codegen = NULL; - if (!_state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - - int j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - - // Don't codegen things that aren't builtins (for now) - if (!evaluator->is_builtin()) { - return NULL; - } - - bool supported = true; - AggFnEvaluator::AggregationOp op = evaluator->agg_op(); - PrimitiveType type = slot_desc->type().type; - // Char and timestamp intermediates aren't supported - if (type == TYPE_TIMESTAMP || type == TYPE_CHAR) { - supported = false; - } - // Only AVG and NDV support string intermediates - if ((type == TYPE_STRING || type == TYPE_VARCHAR) && - !(op == AggFnEvaluator::AVG || op == AggFnEvaluator::NDV)) { - supported = false; - } - // Only SUM, AVG, and NDV support decimal intermediates - if (type == TYPE_DECIMAL && - !(op == AggFnEvaluator::SUM || op == AggFnEvaluator::AVG || - op == AggFnEvaluator::NDV)) { - supported = false; - } - if (type == TYPE_DECIMALV2 && - !(op == AggFnEvaluator::SUM || op == AggFnEvaluator::AVG || - op == AggFnEvaluator::NDV)) { - supported = false; - } - if (!supported) { - VLOG_QUERY << "Could not codegen update_tuple because intermediate type " - << slot_desc->type() - << " is not yet supported for aggregate function \"" - << evaluator->fn_name() << "()\""; - return NULL; - } - } - - if (_intermediate_tuple_desc->generate_llvm_struct(codegen) == NULL) { - VLOG_QUERY << "Could not codegen update_tuple because we could" - << "not generate a matching llvm struct for the intermediate tuple."; - return NULL; - } - - // Get the types to match the update_tuple signature - Type* agg_node_type = codegen->get_type(PartitionedAggregationNode::_s_llvm_class_name); - Type* fn_ctx_type = codegen->get_type(FunctionContextImpl::_s_llvm_functioncontext_name); - Type* tuple_type = codegen->get_type(Tuple::_s_llvm_class_name); - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - - PointerType* agg_node_ptr_type = agg_node_type->getPointerTo(); - PointerType* fn_ctx_ptr_ptr_type = fn_ctx_type->getPointerTo()->getPointerTo(); - PointerType* tuple_ptr_type = tuple_type->getPointerTo(); - PointerType* tuple_row_ptr_type = tuple_row_type->getPointerTo(); - - StructType* tuple_struct = _intermediate_tuple_desc->generate_llvm_struct(codegen); - PointerType* tuple_ptr = PointerType::get(tuple_struct, 0); - LlvmCodeGen::FnPrototype prototype(codegen, "update_tuple", codegen->void_type()); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", agg_node_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("agg_fn_ctxs", fn_ctx_ptr_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("is_merge", codegen->boolean_type())); - - LlvmCodeGen::LlvmBuilder builder(codegen->context()); - Value* args[5]; - Function* fn = prototype.generate_prototype(&builder, &args[0]); - - Value* agg_fn_ctxs_arg = args[1]; - Value* tuple_arg = args[2]; - Value* row_arg = args[3]; - - // Cast the parameter types to the internal llvm runtime types. - // TODO: get rid of this by using right type in function signature - tuple_arg = builder.CreateBitCast(tuple_arg, tuple_ptr, "tuple"); - - // Loop over each expr and generate the IR for that slot. If the expr is not - // count(*), generate a helper IR function to update the slot and call that. - j = _probe_expr_ctxs.size(); - for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) { - // skip non-materialized slots; we don't have evaluators instantiated for those - while (!_intermediate_tuple_desc->slots()[j]->is_materialized()) { - DCHECK_LT(j, _intermediate_tuple_desc->slots().size() - 1); - ++j; - } - SlotDescriptor* slot_desc = _intermediate_tuple_desc->slots()[j]; - AggFnEvaluator* evaluator = _aggregate_evaluators[i]; - if (evaluator->is_count_star()) { - // TODO: we should be able to hoist this up to the loop over the batch and just - // increment the slot by the number of rows in the batch. - int field_idx = slot_desc->field_idx(); - Value* const_one = codegen->get_int_constant(TYPE_BIGINT, 1); - Value* slot_ptr = builder.create_struct_gep(tuple_arg, field_idx, "src_slot"); - Value* slot_loaded = builder.CreateLoad(slot_ptr, "count_star_val"); - Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc"); - builder.CreateStore(count_inc, slot_ptr); - } else { - Function* update_slot_fn = codegen_update_slot(evaluator, slot_desc); - if (update_slot_fn == NULL) return NULL; - Value* fn_ctx_ptr = builder.CreateConstGEP1_32(agg_fn_ctxs_arg, i); - Value* fn_ctx = builder.CreateLoad(fn_ctx_ptr, "fn_ctx"); - builder.CreateCall3(update_slot_fn, fn_ctx, tuple_arg, row_arg); - } - } - builder.CreateRetVoid(); - - // codegen_process_batch() does the final optimizations. - return codegen->FinalizeFunction(fn); -} - -Function* PartitionedAggregationNode::codegen_process_batch() { - LlvmCodeGen* codegen = NULL; - if (!_state->get_codegen(&codegen).ok()) { - return NULL; - } - SCOPED_TIMER(codegen->codegen_timer()); - - Function* update_tuple_fn = codegen_update_tuple(); - if (update_tuple_fn == NULL) { - return NULL; - } - - // Get the cross compiled update row batch function - IRFunction::Type ir_fn = (!_probe_expr_ctxs.empty() ? - IRFunction::PART_AGG_NODE_PROCESS_BATCH_FALSE : - IRFunction::PART_AGG_NODE_PROCESS_BATCH_NO_GROUPING); - Function* process_batch_fn = codegen->get_function(ir_fn); - DCHECK(process_batch_fn != NULL); - - int replaced = 0; - if (!_probe_expr_ctxs.empty()) { - // Aggregation w/o grouping does not use a hash table. - - // Codegen for hash - // The codegen'd process_batch function is only used in open() with _level = 0, - // so don't use murmur hash - Function* hash_fn = _ht_ctx->codegen_hash_current_row(_state, /* use murmur */ false); - if (hash_fn == NULL) { - return NULL; - } - - // Codegen PartitionedHashTable::Equals - Function* equals_fn = _ht_ctx->codegen_equals(_state); - if (equals_fn == NULL) { - return NULL; - } - - // Codegen for evaluating probe rows - Function* eval_probe_row_fn = _ht_ctx->codegen_eval_row(_state, false); - if (eval_probe_row_fn == NULL) { - return NULL; - } - - // Replace call sites - process_batch_fn = codegen->replace_call_sites(process_batch_fn, false, - eval_probe_row_fn, "EvalProbeRow", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites(process_batch_fn, true, - hash_fn, "HashCurrentRow", &replaced); - DCHECK_EQ(replaced, 1); - - process_batch_fn = codegen->replace_call_sites(process_batch_fn, true, - equals_fn, "Equals", &replaced); - DCHECK_EQ(replaced, 1); - } - - process_batch_fn = codegen->replace_call_sites(process_batch_fn, false, - update_tuple_fn, "update_tuple", &replaced); - DCHECK_GE(replaced, 1); - DCHECK(process_batch_fn != NULL); - return codegen->optimize_function_with_exprs(process_batch_fn); -} #endif } diff --git a/be/src/exec/partitioned_aggregation_node.h b/be/src/exec/partitioned_aggregation_node.h index d963b8a74d..bbcc3e0e76 100644 --- a/be/src/exec/partitioned_aggregation_node.h +++ b/be/src/exec/partitioned_aggregation_node.h @@ -29,14 +29,9 @@ #include "runtime/mem_pool.h" #include "runtime/string_value.h" -namespace llvm { - class Function; -} - namespace doris { class AggFnEvaluator; -class LlvmCodeGen; class RowBatch; class RuntimeState; struct StringValue; @@ -109,8 +104,6 @@ public: // virtual void close(RuntimeState* state); virtual Status close(RuntimeState* state); - static const char* _s_llvm_class_name; - protected: // Frees local allocations from _aggregate_evaluators and agg_fn_ctxs // virtual Status QueryMaintenance(RuntimeState* state); @@ -453,20 +446,6 @@ private: void cleanup_hash_tbl(const std::vector& agg_fn_ctxs, PartitionedHashTable::Iterator it); - // Codegen UpdateSlot(). Returns NULL if codegen is unsuccessful. - // Assumes is_merge = false; - llvm::Function* codegen_update_slot(AggFnEvaluator* evaluator, SlotDescriptor* slot_desc); - - // Codegen update_tuple(). Returns NULL if codegen is unsuccessful. - llvm::Function* codegen_update_tuple(); - - // Codegen the process row batch loop. The loop has already been compiled to - // IR and loaded into the codegen object. UpdateAggTuple has also been - // codegen'd to IR. This function will modify the loop subsituting the statically - // compiled functions with codegen'd ones. - // Assumes AGGREGATED_ROWS = false. - llvm::Function* codegen_process_batch(); - // We need two buffers per partition, one for the aggregated stream and one // for the unaggregated stream. We need an additional buffer to read the stream // we are currently repartitioning. diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index e14bdd6b09..61d9d2e6e7 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -17,8 +17,6 @@ #include "exec/partitioned_hash_table.inline.h" -#include "codegen/codegen_anyval.h" -#include "codegen/llvm_codegen.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "exprs/slot_ref.h" @@ -29,8 +27,6 @@ #include "runtime/string_value.hpp" #include "util/doris_metrics.h" -// using namespace llvm; - // DEFINE_bool(enable_quadratic_probing, true, "Enable quadratic probing hash table"); using std::string; @@ -38,20 +34,8 @@ using std::stringstream; using std::vector; using std::endl; -using llvm::BasicBlock; -using llvm::Value; -using llvm::Function; -using llvm::Type; -using llvm::PointerType; -using llvm::LLVMContext; -using llvm::PHINode; -using llvm::ConstantFP; -using llvm::APFloat; - namespace doris { -const char* PartitionedHashTableCtx::_s_llvm_class_name = "class.doris::PartitionedHashTableCtx"; - // Random primes to multiply the seed with. static uint32_t SEED_PRIMES[] = { 1, // First seed must be 1, level 0 is used by other operators in the fragment. @@ -452,494 +436,5 @@ string PartitionedHashTable::print_stats() const { return ss.str(); } -#if 0 -// Helper function to store a value into the results buffer if the expr -// evaluated to NULL. We don't want (NULL, 1) to hash to the same as (0,1) so -// we'll pick a more random value. -static void codegen_assign_null_value(LlvmCodeGen* codegen, - // LlvmCodeGen::LlvmBuilder* builder, Value* dst, const ColumnType& type) { - LlvmCodeGen::LlvmBuilder* builder, Value* dst, const TypeDescriptor& type) { - int64_t fvn_seed = HashUtil::FNV_SEED; - - // if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) { - if (type.type == TYPE_VARCHAR) { - Value* dst_ptr = builder->CreateStructGEP(dst, 0, "string_ptr"); - Value* dst_len = builder->CreateStructGEP(dst, 1, "string_len"); - Value* null_len = codegen->get_int_constant(TYPE_INT, fvn_seed); - Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type()); - builder->CreateStore(null_ptr, dst_ptr); - builder->CreateStore(null_len, dst_len); - } else { - Value* null_value = NULL; - // Get a type specific representation of fvn_seed - switch (type.type) { - case TYPE_BOOLEAN: - // In results, booleans are stored as 1 byte - dst = builder->CreateBitCast(dst, codegen->ptr_type()); - null_value = codegen->get_int_constant(TYPE_TINYINT, fvn_seed); - break; - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - null_value = codegen->get_int_constant(type.type, fvn_seed); - break; - case TYPE_FLOAT: { - // Don't care about the value, just the bit pattern - float fvn_seed_float = *reinterpret_cast(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_float)); - break; - } - case TYPE_DOUBLE: { - // Don't care about the value, just the bit pattern - double fvn_seed_double = *reinterpret_cast(&fvn_seed); - null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_double)); - break; - } - default: - DCHECK(false); - } - builder->CreateStore(null_value, dst); - } -} - -// Codegen for evaluating a tuple row over either _build_expr_ctxs or _probe_expr_ctxs. -// For the case where we are joining on a single int, the IR looks like -// define i1 @EvalBuildRow(%"class.impala::PartitionedHashTableCtx"* %this_ptr, -// %"class.impala::TupleRow"* %row) #20 { -// entry: -// %result = call i64 @GetSlotRef1(%"class.impala::ExprContext"* inttoptr -// (i64 67971664 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %is_null = trunc i64 %result to i1 -// %0 = zext i1 %is_null to i8 -// store i8 %0, i8* inttoptr (i64 95753144 to i8*) -// br i1 %is_null, label %null, label %not_null -// -// null: ; preds = %entry -// store i32 -2128831035, i32* inttoptr (i64 95753128 to i32*) -// br label %continue -// -// not_null: ; preds = %entry -// %1 = ashr i64 %result, 32 -// %2 = trunc i64 %1 to i32 -// store i32 %2, i32* inttoptr (i64 95753128 to i32*) -// br label %continue -// -// continue: ; preds = %not_null, %null -// ret i1 true -// } -// For each expr, we create 3 code blocks. The null, not null and continue blocks. -// Both the null and not null branch into the continue block. The continue block -// becomes the start of the next block for codegen (either the next expr or just the -// end of the function). -Function* PartitionedHashTableCtx::codegen_eval_row(RuntimeState* state, bool build) { - // TODO: codegen_assign_null_value() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet - const vector& ctxs = build ? _build_expr_ctxs : _probe_expr_ctxs; - for (int i = 0; i < ctxs.size(); ++i) { - PrimitiveType type = ctxs[i]->root()->type().type; - // if (type == TYPE_TIMESTAMP || type == TYPE_DECIMAL || type == TYPE_CHAR) { - if (type == TYPE_DATETIME || type == TYPE_DATE - || type == TYPE_DECIMAL || type == TYPE_CHAR) { - return NULL; - } - } - - LlvmCodeGen* codegen; - if (!state->get_codegen(&codegen).ok()) { - return NULL; - } - - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(PartitionedHashTableCtx::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : "EvalProbeRow", - codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - - Value* row = args[1]; - Value* has_null = codegen->false_value(); - - for (int i = 0; i < ctxs.size(); ++i) { - // TODO: refactor this to somewhere else? This is not hash table specific except for - // the null handling bit and would be used for anyone that needs to materialize a - // vector of exprs - // Convert result buffer to llvm ptr type - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ctxs[i]->root()->type()), loc); - - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // Call expr - Function* expr_fn; - Status status = ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - VLOG_QUERY << "Problem with codegen_eval_row: " << status.get_error_msg(); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped( - codegen, &builder, ctxs[i]->root()->type(), expr_fn, expr_fn_args, "result"); - Value* is_null = result.get_is_null(); - - // Set null-byte result - Value* null_byte = builder.CreateZExt(is_null, codegen->get_type(TYPE_TINYINT)); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - builder.CreateStore(null_byte, llvm_null_byte_loc); - - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - if (!_stores_nulls) { - // hash table doesn't store nulls, no reason to keep evaluating exprs - builder.CreateRet(codegen->true_value()); - } else { - codegen_assign_null_value(codegen, &builder, llvm_loc, ctxs[i]->root()->type()); - builder.CreateBr(continue_block); - } - - // Not null block - builder.SetInsertPoint(not_null_block); - result.ToNativePtr(llvm_loc); - builder.CreateBr(continue_block); - - // Continue block - builder.SetInsertPoint(continue_block); - if (_stores_nulls) { - // Update has_null - PHINode* is_null_phi = builder.CreatePHI(codegen->boolean_type(), 2, "is_null_phi"); - is_null_phi->addIncoming(codegen->true_value(), null_block); - is_null_phi->addIncoming(codegen->false_value(), not_null_block); - has_null = builder.CreateOr(has_null, is_null_phi, "has_null"); - } - } - builder.CreateRet(has_null); - - return codegen->FinalizeFunction(fn); -} - -// Codegen for hashing the current row. In the case with both string and non-string data -// (group by int_col, string_col), the IR looks like: -// define i32 @HashCurrentRow(%"class.impala::PartitionedHashTableCtx"* %this_ptr) #20 { -// entry: -// %seed = call i32 @get_hash_seed(%"class.impala::PartitionedHashTableCtx"* %this_ptr) -// %0 = call i32 @CrcHash16(i8* inttoptr (i64 119151296 to i8*), i32 16, i32 %seed) -// %1 = load i8* inttoptr (i64 119943721 to i8*) -// %2 = icmp ne i8 %1, 0 -// br i1 %2, label %null, label %not_null -// -// null: ; preds = %entry -// %3 = call i32 @CrcHash161(i8* inttoptr (i64 119151312 to i8*), i32 16, i32 %0) -// br label %continue -// -// not_null: ; preds = %entry -// %4 = load i8** getelementptr inbounds (%"struct.impala::StringValue"* inttoptr -// (i64 119151312 to %"struct.impala::StringValue"*), i32 0, i32 0) -// %5 = load i32* getelementptr inbounds (%"struct.impala::StringValue"* inttoptr -// (i64 119151312 to %"struct.impala::StringValue"*), i32 0, i32 1) -// %6 = call i32 @IrCrcHash(i8* %4, i32 %5, i32 %0) -// br label %continue -// -// continue: ; preds = %not_null, %null -// %7 = phi i32 [ %6, %not_null ], [ %3, %null ] -// call void @set_hash(%"class.impala::PartitionedHashTableCtx"* %this_ptr, i32 %7) -// ret i32 %7 -// } -Function* PartitionedHashTableCtx::codegen_hash_current_row(RuntimeState* state, bool use_murmur) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) return NULL; - } - - LlvmCodeGen* codegen; - if (!state->get_codegen(&codegen).ok()) return NULL; - - // Get types to generate function prototype - Type* this_type = codegen->get_type(PartitionedHashTableCtx::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, - (use_murmur ? "MurmurHashCurrentRow" : "HashCurrentRow"), - codegen->get_type(TYPE_INT)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* this_arg; - Function* fn = prototype.generate_prototype(&builder, &this_arg); - - // Call get_hash_seed() to get _seeds[_level] - Function* get_hash_seed_fn = codegen->GetFunction(IRFunction::HASH_TABLE_GET_HASH_SEED); - Value* seed = builder.CreateCall(get_hash_seed_fn, this_arg, "seed"); - - Value* hash_result = seed; - Value* data = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), _expr_values_buffer); - if (_var_result_begin == -1) { - // No variable length slots, just hash what is in '_expr_values_buffer' - if (_results_buffer_size > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(_results_buffer_size) : - codegen->GetHashFunction(_results_buffer_size); - Value* len = codegen->get_int_constant(TYPE_INT, _results_buffer_size); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result, "hash"); - } - } else { - if (_var_result_begin > 0) { - Function* hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(_var_result_begin) : - codegen->GetHashFunction(_var_result_begin); - Value* len = codegen->get_int_constant(TYPE_INT, _var_result_begin); - hash_result = builder.CreateCall3(hash_fn, data, len, hash_result, "hash"); - } - - // Hash string slots - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // if (_build_expr_ctxs[i]->root()->type().type != TYPE_STRING - // && _build_expr_ctxs[i]->root()->type().type != TYPE_VARCHAR) continue; - if (_build_expr_ctxs[i]->root()->type().type != TYPE_VARCHAR) { - continue; - } - - BasicBlock* null_block = NULL; - BasicBlock* not_null_block = NULL; - BasicBlock* continue_block = NULL; - Value* str_null_result = NULL; - - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - - // If the hash table stores nulls, we need to check if the stringval - // evaluated to NULL - if (_stores_nulls) { - null_block = BasicBlock::Create(context, "null", fn); - not_null_block = BasicBlock::Create(context, "not_null", fn); - continue_block = BasicBlock::Create(context, "continue", fn); - - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte"); - Value* is_null = builder.CreateICmpNE(null_byte, - codegen->get_int_constant(TYPE_TINYINT, 0), "is_null"); - builder.CreateCondBr(is_null, null_block, not_null_block); - - // For null, we just want to call the hash function on the portion of - // the data - builder.SetInsertPoint(null_block); - Function* null_hash_fn = use_murmur ? - codegen->GetMurmurHashFunction(sizeof(StringValue)) : - codegen->GetHashFunction(sizeof(StringValue)); - Value* llvm_loc = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), loc); - Value* len = codegen->get_int_constant(TYPE_INT, sizeof(StringValue)); - str_null_result = - builder.CreateCall3(null_hash_fn, llvm_loc, len, hash_result, "str_null"); - builder.CreateBr(continue_block); - - builder.SetInsertPoint(not_null_block); - } - - // Convert _expr_values_buffer loc to llvm value - // Value* str_val = codegen->cast_ptr_to_llvm_ptr(codegen->get_ptr_type(TYPE_STRING), loc); - Value* str_val = codegen->cast_ptr_to_llvm_ptr(codegen->get_ptr_type(TYPE_VARCHAR), loc); - - Value* ptr = builder.CreateStructGEP(str_val, 0); - Value* len = builder.CreateStructGEP(str_val, 1); - ptr = builder.CreateLoad(ptr, "ptr"); - len = builder.CreateLoad(len, "len"); - - // Call hash(ptr, len, hash_result); - Function* general_hash_fn = use_murmur ? codegen->GetMurmurHashFunction() : - codegen->GetHashFunction(); - Value* string_hash_result = - builder.CreateCall3(general_hash_fn, ptr, len, hash_result, "string_hash"); - - if (_stores_nulls) { - builder.CreateBr(continue_block); - builder.SetInsertPoint(continue_block); - // Use phi node to reconcile that we could have come from the string-null - // path and string not null paths. - PHINode* phi_node = builder.CreatePHI(codegen->get_type(TYPE_INT), 2, "hash_phi"); - phi_node->addIncoming(string_hash_result, not_null_block); - phi_node->addIncoming(str_null_result, null_block); - hash_result = phi_node; - } else { - hash_result = string_hash_result; - } - } - } - - builder.CreateRet(hash_result); - return codegen->FinalizeFunction(fn); -} - -// Codegen for PartitionedHashTableCtx::equals. For a hash table with two exprs (string,int), -// the IR looks like: -// -// define i1 @equals(%"class.impala::PartitionedHashTableCtx"* %this_ptr, -// %"class.impala::TupleRow"* %row) { -// entry: -// %result = call i64 @GetSlotRef(%"class.impala::ExprContext"* inttoptr -// (i64 146381856 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %0 = trunc i64 %result to i1 -// br i1 %0, label %null, label %not_null -// -// false_block: ; preds = %not_null2, %null1, %not_null, %null -// ret i1 false -// -// null: ; preds = %entry -// br i1 false, label %continue, label %false_block -// -// not_null: ; preds = %entry -// %1 = load i32* inttoptr (i64 104774368 to i32*) -// %2 = ashr i64 %result, 32 -// %3 = trunc i64 %2 to i32 -// %cmp_raw = icmp eq i32 %3, %1 -// br i1 %cmp_raw, label %continue, label %false_block -// -// continue: ; preds = %not_null, %null -// %result4 = call { i64, i8* } @GetSlotRef1( -// %"class.impala::ExprContext"* inttoptr -// (i64 146381696 to %"class.impala::ExprContext"*), -// %"class.impala::TupleRow"* %row) -// %4 = extractvalue { i64, i8* } %result4, 0 -// %5 = trunc i64 %4 to i1 -// br i1 %5, label %null1, label %not_null2 -// -// null1: ; preds = %continue -// br i1 false, label %continue3, label %false_block -// -// not_null2: ; preds = %continue -// %6 = extractvalue { i64, i8* } %result4, 0 -// %7 = ashr i64 %6, 32 -// %8 = trunc i64 %7 to i32 -// %result5 = extractvalue { i64, i8* } %result4, 1 -// %cmp_raw6 = call i1 @_Z11StringValEQPciPKN6impala11StringValueE( -// i8* %result5, i32 %8, %"struct.impala::StringValue"* inttoptr -// (i64 104774384 to %"struct.impala::StringValue"*)) -// br i1 %cmp_raw6, label %continue3, label %false_block -// -// continue3: ; preds = %not_null2, %null1 -// ret i1 true -// } -Function* PartitionedHashTableCtx::codegen_equals(RuntimeState* state) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // Disable codegen for CHAR - if (_build_expr_ctxs[i]->root()->type().type == TYPE_CHAR) return NULL; - } - - LlvmCodeGen* codegen; - if (!state->get_codegen(&codegen).ok()) return NULL; - // Get types to generate function prototype - Type* tuple_row_type = codegen->get_type(TupleRow::_s_llvm_class_name); - DCHECK(tuple_row_type != NULL); - PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0); - - Type* this_type = codegen->get_type(PartitionedHashTableCtx::_s_llvm_class_name); - DCHECK(this_type != NULL); - PointerType* this_ptr_type = PointerType::get(this_type, 0); - - LlvmCodeGen::FnPrototype prototype(codegen, "Equals", codegen->get_type(TYPE_BOOLEAN)); - prototype.add_argument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type)); - prototype.add_argument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type)); - - LLVMContext& context = codegen->context(); - LlvmCodeGen::LlvmBuilder builder(context); - Value* args[2]; - Function* fn = prototype.generate_prototype(&builder, args); - Value* row = args[1]; - - BasicBlock* false_block = BasicBlock::Create(context, "false_block", fn); - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - BasicBlock* null_block = BasicBlock::Create(context, "null", fn); - BasicBlock* not_null_block = BasicBlock::Create(context, "not_null", fn); - BasicBlock* continue_block = BasicBlock::Create(context, "continue", fn); - - // call get_value on build_exprs[i] - Function* expr_fn; - Status status = _build_expr_ctxs[i]->root()->get_codegend_compute_fn(state, &expr_fn); - if (!status.ok()) { - // VLOG_QUERY << "Problem with codegen_equals: " << status.GetDetail(); - VLOG_QUERY << "Problem with codegen_equals: " << status.get_error_msg(); - fn->eraseFromParent(); // deletes function - return NULL; - } - - Value* ctx_arg = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(ExprContext::_s_llvm_class_name), _build_expr_ctxs[i]); - Value* expr_fn_args[] = { ctx_arg, row }; - CodegenAnyVal result = CodegenAnyVal::create_call_wrapped(codegen, &builder, - _build_expr_ctxs[i]->root()->type(), expr_fn, expr_fn_args, "result"); - Value* is_null = result.get_is_null(); - - // Determine if probe is null (i.e. _expr_value_null_bits[i] == true). In - // the case where the hash table does not store nulls, this is always false. - Value* probe_is_null = codegen->false_value(); - uint8_t* null_byte_loc = &_expr_value_null_bits[i]; - if (_stores_nulls) { - Value* llvm_null_byte_loc = - codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(), null_byte_loc); - Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); - probe_is_null = builder.CreateICmpNE(null_byte, - codegen->get_int_constant(TYPE_TINYINT, 0)); - } - - // Get llvm value for probe_val from '_expr_values_buffer' - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - Value* probe_val = codegen->cast_ptr_to_llvm_ptr( - codegen->get_ptr_type(_build_expr_ctxs[i]->root()->type()), loc); - - // Branch for get_value() returning NULL - builder.CreateCondBr(is_null, null_block, not_null_block); - - // Null block - builder.SetInsertPoint(null_block); - builder.CreateCondBr(probe_is_null, continue_block, false_block); - - // Not-null block - builder.SetInsertPoint(not_null_block); - if (_stores_nulls) { - BasicBlock* cmp_block = BasicBlock::Create(context, "cmp", fn); - // First need to compare that probe expr[i] is not null - builder.CreateCondBr(probe_is_null, false_block, cmp_block); - builder.SetInsertPoint(cmp_block); - } - // Check result == probe_val - Value* is_equal = result.EqToNativePtr(probe_val); - builder.CreateCondBr(is_equal, continue_block, false_block); - - builder.SetInsertPoint(continue_block); - } - builder.CreateRet(codegen->true_value()); - - builder.SetInsertPoint(false_block); - builder.CreateRet(codegen->false_value()); - - return codegen->FinalizeFunction(fn); -} -#endif - } // namespace doris diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h index 3c5b142418..94f6c9847f 100644 --- a/be/src/exec/partitioned_hash_table.h +++ b/be/src/exec/partitioned_hash_table.h @@ -33,15 +33,10 @@ #include "util/hash_util.hpp" #include "util/bit_util.h" -namespace llvm { - class Function; -} - namespace doris { class Expr; class ExprContext; -class LlvmCodeGen; class MemTracker; class MemTracker; class RowDescriptor; @@ -156,24 +151,6 @@ public: int results_buffer_size() const { return _results_buffer_size; } - // Codegen for evaluating a tuple row. Codegen'd function matches the signature - // for EvalBuildRow and EvalTupleRow. - // If build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs. - llvm::Function* codegen_eval_row(RuntimeState* state, bool build_row); - - // Codegen for evaluating a TupleRow and comparing equality against - // '_expr_values_buffer'. Function signature matches PartitionedHashTable::equals(). - llvm::Function* codegen_equals(RuntimeState* state); - - // Codegen for hashing the expr values in '_expr_values_buffer'. Function prototype - // matches HashCurrentRow identically. Unlike HashCurrentRow(), the returned function - // only uses a single hash function, rather than switching based on _level. - // If 'use_murmur' is true, murmur hash is used, otherwise CRC is used if the hardware - // supports it (see hash-util.h). - llvm::Function* codegen_hash_current_row(RuntimeState* state, bool use_murmur); - - static const char* _s_llvm_class_name; - private: friend class PartitionedHashTable; friend class PartitionedHashTableTest_HashEmpty_Test; diff --git a/be/src/exec/topn_node.cpp b/be/src/exec/topn_node.cpp index e1820b299c..e83346904e 100644 --- a/be/src/exec/topn_node.cpp +++ b/be/src/exec/topn_node.cpp @@ -70,13 +70,6 @@ Status TopNNode::prepare(RuntimeState* state) { _tuple_row_less_than.reset( new TupleRowComparator(_sort_exec_exprs, _is_asc_order, _nulls_first)); - if (state->codegen_level() > 0) { - bool success = _tuple_row_less_than->codegen(state); - if (success) { - // AddRuntimeExecOption("Codegen Enabled"); - } - } - _abort_on_default_limit_exceeded = _abort_on_default_limit_exceeded && state->abort_on_default_limit_exceeded(); _materialized_tuple_desc = _row_descriptor.tuple_descriptors()[0]; diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp index 2b57e8c0da..86ff768be3 100644 --- a/be/src/exec/union_node.cpp +++ b/be/src/exec/union_node.cpp @@ -17,7 +17,6 @@ #include "exec/union_node.h" -#include "codegen/llvm_codegen.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "runtime/row_batch.h" @@ -30,8 +29,6 @@ // #include "common/names.h" -using namespace llvm; - namespace doris { UnionNode::UnionNode(ObjectPool* pool, const TPlanNode& tnode, @@ -96,52 +93,6 @@ Status UnionNode::prepare(RuntimeState* state) { return Status::OK(); } -void UnionNode::codegen(RuntimeState* state) { -#if 0 - DCHECK(state->ShouldCodegen()); - ExecNode::codegen(state); - if (IsNodeCodegenDisabled()) return; - - LlvmCodeGen* codegen = state->codegen(); - DCHECK(codegen != nullptr); - std::stringstream codegen_message; - Status codegen_status; - for (int i = 0; i < _child_expr_lists.size(); ++i) { - if (is_child_passthrough(i)) continue; - - llvm::Function* tuple_materialize_exprs_fn; - codegen_status = Tuple::CodegenMaterializeExprs(codegen, false, *_tuple_desc, - _child_expr_lists[i], true, &tuple_materialize_exprs_fn); - if (!codegen_status.ok()) { - // Codegen may fail in some corner cases (e.g. we don't handle TYPE_CHAR). If this - // happens, abort codegen for this and the remaining children. - codegen_message << "Codegen failed for child: " << _children[i]->id(); - break; - } - - // Get a copy of the function. This function will be modified and added to the - // vector of functions. - Function* union_materialize_batch_fn = - codegen->GetFunction(IRFunction::UNION_MATERIALIZE_BATCH, true); - DCHECK(union_materialize_batch_fn != nullptr); - - int replaced = codegen->ReplaceCallSites(union_materialize_batch_fn, - tuple_materialize_exprs_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL); - DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(union_materialize_batch_fn); - - union_materialize_batch_fn = codegen->FinalizeFunction( - union_materialize_batch_fn); - DCHECK(union_materialize_batch_fn != nullptr); - - // Add the function to Jit and to the vector of codegened functions. - codegen->AddFunctionToJit(union_materialize_batch_fn, - reinterpret_cast(&(_codegend_union_materialize_batch_fns.data()[i]))); - } - runtime_profile()->AddCodegenMsg( - codegen_status.ok(), codegen_status, codegen_message.str()); -#endif -} - Status UnionNode::open(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::open(state)); diff --git a/be/src/exec/union_node.h b/be/src/exec/union_node.h index b12fb597a1..70f65d1e05 100644 --- a/be/src/exec/union_node.h +++ b/be/src/exec/union_node.h @@ -45,7 +45,6 @@ public: virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); virtual Status prepare(RuntimeState* state); - virtual void codegen(RuntimeState* state); virtual Status open(RuntimeState* state); virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); // virtual Status reset(RuntimeState* state);