/* * Copyright (c) 2020 Huawei Technologies Co.,Ltd. * * openGauss is licensed under Mulan PSL v2. * You can use this software according to the terms and conditions of the Mulan PSL v2. * You may obtain a copy of Mulan PSL v2 at: * * http://license.coscl.org.cn/MulanPSL2 * * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PSL v2 for more details. * --------------------------------------------------------------------------------------- * * vechashaggcodegen.cpp * Routines to handle vector hashagg nodes. We only focuse on * the CPU intensive part of hashagg operation. * IDENTIFICATION * src/gausskernel/runtime/codegen/vecexecutor/vechashaggcodegen.cpp * * --------------------------------------------------------------------------------------- */ #include "codegen/gscodegen.h" #include "codegen/vechashaggcodegen.h" #include "codegen/vecsortcodegen.h" #include "codegen/codegendebuger.h" #include "codegen/builtinscodegen.h" #include "codegen/vecexprcodegen.h" #include "catalog/pg_operator.h" #include "pgxc/pgxc.h" #include "vecexecutor/vecexecutor.h" void WrapAllocHashSlot(HashAggRunner* haRunner, VectorBatch* batch, int idx, int keysimple); void WrapSglTblAllocHashSlot(HashAggRunner* haRunner, VectorBatch* batch, int idx, int keysimple); void WrapResetExprContext(ExprContext* econtext); /* macro values used for prefetch in batchagg */ #define PREFETCH_AGGHASHING_DISTANCE1 4 #define PREFETCH_AGGHASHING_DISTANCE2 2 #define PREFETCH_BATCHAGGREGATION_DISTANCE 2 #define AGG_ECONOMY_RATION 0.001 namespace dorado { llvm::Function* prefetchAggHashingCodeGen(); llvm::Function* prefetchBatchAggregationCodeGen(); llvm::Function* prefetchAggSglTblHashingCodeGen(); int VecHashAggCodeGen::GetAlignedScale(Expr* node) { int resscale = 0; /* * Get into this function only when fast_aggref is true, which means * we could apply fast codegen path for this expression node. */ switch (nodeTag(node)) { case T_TargetEntry: { TargetEntry* tentry = (TargetEntry*)node; resscale = GetAlignedScale(tentry->expr); } break; case T_OpExpr: { OpExpr* opexpr = (OpExpr*)node; Expr* lexpr = (Expr*)linitial(opexpr->args); Expr* rexpr = (Expr*)lsecond(opexpr->args); int lscale = GetAlignedScale(lexpr); int rscale = GetAlignedScale(rexpr); /* * Only when AggRefFastJittable is satisfied, we evaluate the scale * of the aggref expression. So, no need to consider NUMERICDIVOID. */ if (opexpr->opno == NUMERICADDOID || opexpr->opno == NUMERICSUBOID) { if (lscale < rscale) resscale = rscale; else resscale = lscale; } else if (opexpr->opno == NUMERICMULOID) resscale = lscale + rscale; else ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), errmodule(MOD_LLVM), errmsg("Unsupported operation %u in FastAgg.", opexpr->opno))); } break; case T_Var: { Var* var = (Var*)node; Assert(var->vartype == NUMERICOID); resscale = (var->vartypmod - VARHDRSZ) & NUMERIC_BI_SCALEMASK; } break; case T_Const: { Const* cst = (Const*)node; Assert(cst->consttype == NUMERICOID); /* Extract the real value of this const */ ScalarValue val = ScalarVector::DatumToScalar(cst->constvalue, cst->consttype, cst->constisnull); resscale = NUMERIC_BI_SCALE((Numeric)val); } break; default: Assert(0); break; } return resscale; } bool VecHashAggCodeGen::AggRefJittable(ExprState* state) { Expr* node = state->expr; switch (nodeTag(node)) { /* * there always be a targetentry in AggrefExprState, so extract * targetentry first. */ case T_TargetEntry: { GenericExprState* gstate = (GenericExprState*)state; ExprState* tstate = gstate->arg; if (!AggRefJittable(tstate)) return false; } break; case T_Var: { Var* var = (Var*)state->expr; /* If var is in sysattrlist, we do not codegen the var expr. */ if (var->varattno < 0) return false; /* only support int8 and numeric type */ switch (var->vartype) { case INT8OID: case NUMERICOID: break; default: return false; } if (var->vartype == NUMERICOID) { /* * do not consider numeric data type without precision * specfied, since it changed during evaluation. */ if (var->vartypmod == -1) return false; } state->exprCodeGen = (exprFakeCodeGenSig)&VecExprCodeGen::VarCodeGen; } break; case T_Const: { Const* con = (Const*)state->expr; switch (con->consttype) { /* only support int8 and numeric type */ case INT8OID: case NUMERICOID: break; default: return false; } state->exprCodeGen = (exprFakeCodeGenSig)&VecExprCodeGen::ConstCodeGen; } break; case T_OpExpr: { /* check if have special opexpr with one arg */ OpExpr* opexpr = (OpExpr*)state->expr; FuncExprState* fstate = (FuncExprState*)state; if (list_length(opexpr->args) == 1) return false; switch (opexpr->opno) { /* * only support limited math operations */ case INT8PLOID: case INT8MIOID: case INT8MULOID: case INT8DIVOID: case NUMERICADDOID: case NUMERICSUBOID: case NUMERICMULOID: case NUMERICDIVOID: break; default: return false; } /* * now consider the args of this operation expression. */ ExprState* lestate = (ExprState*)linitial(fstate->args); ExprState* restate = (ExprState*)lsecond(fstate->args); if (!AggRefJittable(lestate) || !AggRefJittable(restate)) return false; state->exprCodeGen = (exprFakeCodeGenSig)&VecExprCodeGen::OpCodeGen; } break; default: return false; } return true; } bool VecHashAggCodeGen::AggRefFastJittable(ExprState* state) { Expr* node = state->expr; switch (nodeTag(node)) { case T_TargetEntry: { GenericExprState* gstate = (GenericExprState*)state; ExprState* tstate = (ExprState*)(gstate->arg); if (!AggRefFastJittable(tstate)) return false; } break; case T_Var: { Var* var = (Var*)node; if (var->vartype != NUMERICOID || var->vartypmod == -1) return false; /* get the precision of this attribute column */ int prec = (var->vartypmod >> 16) & 0xFFFF; if (prec > 18) return false; } break; case T_Const: { Const* cst = (Const*)(state->expr); /* only consider not null numeric type const value */ if (cst->consttype != NUMERICOID || cst->constisnull) return false; ScalarValue val = ScalarVector::DatumToScalar(cst->constvalue, cst->consttype, cst->constisnull); if (!cst->constisnull) { if ((((NumericData*)val)->choice.n_header & NUMERIC_BI_MASK) != NUMERIC_64) return false; } } break; case T_OpExpr: { /* check if have special opexpr with one arg */ OpExpr* opexpr = (OpExpr*)state->expr; FuncExprState* fstate = (FuncExprState*)state; if (list_length(opexpr->args) == 1) return false; /* only support +, -, * operation in numeric expression */ switch (opexpr->opno) { case NUMERICADDOID: case NUMERICSUBOID: case NUMERICMULOID: break; default: return false; } ExprState* lestate = (ExprState*)linitial(fstate->args); ExprState* restate = (ExprState*)lsecond(fstate->args); if (!AggRefFastJittable(lestate) || !AggRefFastJittable(restate)) return false; } break; default: Assert(0); return false; } return true; } bool VecHashAggCodeGen::AgghashingJittable(VecAggState* node) { VecAgg* vecagg = (VecAgg*)(node->ss.ps.plan); if (!u_sess->attr.attr_sql.enable_codegen || IS_PGXC_COORDINATOR) return false; /* Only support hash aggeregation */ if (vecagg->aggstrategy != AGG_HASHED) return false; /* * Get the input variable from the outer plan, and we only * support char, varchar, text, int4, int8 and timestamp(date). */ List* tlist = (outerPlan(vecagg))->targetlist; int numkeys = vecagg->numCols; AttrNumber* keyIdx = vecagg->grpColIdx; for (int i = 0; i < numkeys; i++) { AttrNumber key = keyIdx[i] - 1; TargetEntry* tle = (TargetEntry*)list_nth(tlist, key); /* only support variable expr */ switch (nodeTag(tle->expr)) { case T_Var: { Var* var = (Var*)(tle->expr); switch (var->vartype) { case BPCHAROID: { int len = var->vartypmod - VARHDRSZ; /* no codegeneration for unknown length */ if (len < 0) return false; } break; case INT4OID: case INT8OID: case TIMESTAMPOID: case DATEOID: case TEXTOID: case VARCHAROID: break; default: return false; } } break; case T_FuncExpr: { FuncExpr* funcexpr = (FuncExpr*)(tle->expr); if (funcexpr->funcid != SUBSTRFUNCOID) return false; /* Only support ASCII and UTF-8 encoding */ int current_encoding = GetDatabaseEncoding(); if (current_encoding != PG_SQL_ASCII && current_encoding != PG_UTF8) return false; List* func_args = funcexpr->args; Expr* aexpr1 = (Expr*)linitial(func_args); Expr* aexpr2 = (Expr*)lsecond(func_args); Expr* aexpr3 = (Expr*)lthird(func_args); if (!IsA(aexpr1, RelabelType) && !IsA(aexpr1, Var)) return false; if (!IsA(aexpr2, Const) || !IsA(aexpr3, Const)) return false; } break; default: return false; } /* only codegen hashint4, hashint8, hashtext, * timestamp_hash, and hashbpchar functions */ Oid fnoid = node->hashfunctions[i].fn_oid; switch (fnoid) { case HASHINT4OID: case HASHINT8OID: case HASHBPCHAROID: case HASHTEXTOID: break; case TIMESTAMPHASHOID: { #ifdef HAVE_INT64_TIMESTAMP #else return false; #endif } break; default: return false; } } return true; } bool VecHashAggCodeGen::BatchAggJittable(VecAggState* node, bool isSonic) { int i = 0; VecAgg* vecagg = (VecAgg*)(node->ss.ps.plan); AggStatePerAgg peragg = node->peragg; if (!u_sess->attr.attr_sql.enable_codegen || IS_PGXC_COORDINATOR) return false; /* only support hashagg */ if (vecagg->aggstrategy != AGG_HASHED) return false; /* if no agg funcs exists, no codegen is needed */ if (0 == node->numaggs) return false; for (i = 0; i < node->numaggs; i++) { AggrefExprState* aggexprstate = peragg[i].aggrefstate; Aggref* aggref = peragg[i].aggref; /* only support sum/avg */ switch (aggref->aggfnoid) { case INT8AVGFUNCOID: case INT8SUMFUNCOID: if (isSonic) return false; break; case NUMERICAVGFUNCOID: case NUMERICSUMFUNCOID: case COUNTOID: break; default: return false; } /* count(*) has no args */ if (aggref->aggfnoid == COUNTOID) continue; ExprState* estate = (ExprState*)linitial(aggexprstate->args); /* We only support simple expression cases */ if (!AggRefJittable(estate)) return false; } return true; } void VecHashAggCodeGen::HashAggCodeGen(VecAggState* node) { /* * Codegeneration for hashagg: * Since the whole HashAggRunner::BuildAggTbl has been divided into three * part, we should do codegeneration separately. */ Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; llvm::Function* jitted_vechashing = NULL; llvm::Function* jitted_vecsglhashing = NULL; llvm::Function* jitted_vecbatchagg = NULL; llvm::Function* jitted_SortAggMatchKey = NULL; /* * For aggregation, if economy is too small, which means number of distinct * values is very large, we meet cachemiss during batch aggregation, so * consider prefetch in this case. */ VecAgg* vagg = (VecAgg*)node->ss.ps.plan; Plan* outerplan = outerPlan(vagg); bool use_prefetch = vagg->plan.plan_rows / ((Plan*)outerplan)->plan_rows > AGG_ECONOMY_RATION; if (use_prefetch) { jitted_vechashing = AgghashingWithPrefetchCodeGenorSglTbl(node); jitted_vecsglhashing = AgghashingWithPrefetchCodeGenorSglTbl(node); } else { jitted_vechashing = AgghashingCodeGenorSglTbl(node); jitted_vecsglhashing = AgghashingCodeGenorSglTbl(node); } if (NULL != jitted_vechashing) llvmCodeGen->addFunctionToMCJit(jitted_vechashing, reinterpret_cast(&(node->jitted_hashing))); if (NULL != jitted_vecsglhashing) llvmCodeGen->addFunctionToMCJit(jitted_vecsglhashing, reinterpret_cast(&(node->jitted_sglhashing))); /* Codegeneration for BatchAggregation in buildAggTbl */ jitted_vecbatchagg = dorado::VecHashAggCodeGen::BatchAggregationCodeGen(node, use_prefetch); if (NULL != jitted_vecbatchagg) llvmCodeGen->addFunctionToMCJit(jitted_vecbatchagg, reinterpret_cast(&(node->jitted_batchagg))); /* Codegeneration for sortagg */ jitted_SortAggMatchKey = dorado::VecSortCodeGen::SortAggMatchKeyCodeGen(node); node->jitted_SortAggMatchKey = NULL; if (NULL != jitted_SortAggMatchKey) llvmCodeGen->addFunctionToMCJit( jitted_SortAggMatchKey, reinterpret_cast(&(node->jitted_SortAggMatchKey))); /* Codegenration for targetlist of aggregation */ llvm::Function* jitted_vectarget = NULL; jitted_vectarget = dorado::VecExprCodeGen::TargetListCodeGen(node->ss.ps.targetlist, (PlanState*)node); if (NULL != jitted_vectarget) llvmCodeGen->addFunctionToMCJit( jitted_vectarget, reinterpret_cast(&(node->ss.ps.ps_ProjInfo->jitted_vectarget))); } void VecHashAggCodeGen::SonicHashAggCodeGen(VecAggState* node) { Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; llvm::Function* jitted_sonicbatchagg = NULL; jitted_sonicbatchagg = SonicBatchAggregationCodeGen(node, false); if (NULL != jitted_sonicbatchagg) llvmCodeGen->addFunctionToMCJit(jitted_sonicbatchagg, reinterpret_cast(&(node->jitted_sonicbatchagg))); } template llvm::Function* VecHashAggCodeGen::AgghashingCodeGenorSglTbl(VecAggState* node) { Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* If the condition can not be satisfied, no need to codegen */ if (!AgghashingJittable(node)) return NULL; /* Find and load the IR file from the installaion directory */ llvmCodeGen->loadIRFile(); /* Extract plan information from node */ int i = 0; VecAgg* vecagg = (VecAgg*)(node->ss.ps.plan); int numkeys = vecagg->numCols; AttrNumber* keyIdx = vecagg->grpColIdx; /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); llvm::Value* hAggRunner = NULL; llvm::Value* batch = NULL; llvm::Value* tmpval = NULL; llvm::Value* cmpval = NULL; llvm::Value* idx_next = NULL; llvm::Value* llvmargs[2]; llvm::PHINode* phi_idx = NULL; llvm::Function* jitted_agghashing = NULL; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(hashCellPtrType, "struct.hashCell"); DEFINE_CG_PTRTYPE(vectorBatchPtrType, "class.VectorBatch"); DEFINE_CG_PTRTYPE(hashAggRunnerPtrType, "class.HashAggRunner"); DEFINE_CGVAR_INT32(int32_m1, -1); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT32(int32_pos_hBOper_cols, pos_hBOper_cols); DEFINE_CGVAR_INT32(int32_pos_hBOper_cacheLoc, pos_hBOper_cacheLoc); DEFINE_CGVAR_INT32(int32_pos_hAggR_hashVal, pos_hAggR_hashVal); DEFINE_CGVAR_INT32(int32_pos_hAggR_hSegTbl, pos_hAggR_hSegTbl); DEFINE_CGVAR_INT32(int32_pos_hAggR_hsegmax, pos_hAggR_hsegmax); /* for AgghashingCodeGen */ DEFINE_CGVAR_INT32(int32_pos_hAggR_hashSize, pos_hAggR_hashSize); DEFINE_CGVAR_INT32(int32_pos_bAggR_keyIdxInCell, pos_bAggR_keyIdxInCell); DEFINE_CGVAR_INT32(int32_pos_bAggR_Loc, pos_bAggR_Loc); DEFINE_CGVAR_INT32(int32_pos_bAggR_keySimple, pos_bAggR_keySimple); DEFINE_CGVAR_INT32(int32_pos_hcell_mval, pos_hcell_mval); DEFINE_CGVAR_INT32(int32_pos_batch_marr, pos_batch_marr); DEFINE_CGVAR_INT32(int32_pos_scalvec_vals, pos_scalvec_vals); DEFINE_CGVAR_INT32(int32_pos_scalvec_flag, pos_scalvec_flag); DEFINE_CGVAR_INT64(Datum_0, 0); DEFINE_CGVAR_INT64(Datum_1, 1); /* llvm array values, used to represent the location of some element */ llvm::Value* Vals[2] = {Datum_0, int32_0}; llvm::Value* Vals3[3] = {Datum_0, int32_0, int32_0}; llvm::Value* Vals4[4] = {Datum_0, int32_0, int32_0, int32_0}; llvm::Value* Vals5[5] = {Datum_0, int32_0, int32_0, int32_0, int32_0}; const char* name = NULL; if (isSglTbl) { name = "JittedSglTblAggHashing"; } else { name = "JittedAggHashing"; } GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, name, voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("hashAggRunner", hashAggRunnerPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("batch", vectorBatchPtrType)); jitted_agghashing = fn_prototype.generatePrototype(&builder, &llvmargs[0]); /* start the main codegen process for hashagg */ hAggRunner = llvmargs[0]; batch = llvmargs[1]; /* get the number of rows of this batch : VectorBatch.m_rows */ tmpval = builder.CreateInBoundsGEP(batch, Vals); llvm::Value* nValues = builder.CreateAlignedLoad(tmpval, 4, "m_rows"); /* mask = hashAggRunner.m_hashSize - 1 */ Vals[1] = int32_pos_hAggR_hashSize; tmpval = builder.CreateInBoundsGEP(hAggRunner, Vals); llvm::Value* maskval = builder.CreateAlignedLoad(tmpval, 8, "m_hashSize"); maskval = builder.CreateSub(maskval, Datum_1, "mask"); /* HashAggRunner.BaseAggRunner.hashBasedOperator.m_cols */ Vals4[3] = int32_pos_hBOper_cols; llvm::Value* m_colsVal = builder.CreateInBoundsGEP(hAggRunner, Vals4); /* define basic block information for batch loop */ llvm::BasicBlock* entry = &jitted_agghashing->getEntryBlock(); DEFINE_BLOCK(for_body, jitted_agghashing); DEFINE_BLOCK(for_end, jitted_agghashing); DEFINE_BLOCK(for_inc, jitted_agghashing); /* define basic block information for key-value matching */ DEFINE_BLOCK(while_body, jitted_agghashing); DEFINE_BLOCK(key_match, jitted_agghashing); DEFINE_BLOCK(hashval_eq, jitted_agghashing); DEFINE_BLOCK(next_cell, jitted_agghashing); DEFINE_BLOCK(alloc_hashslot, jitted_agghashing); /* define vector structures used for store batch info. in LLVM */ llvm::Value** keyIdxInCell = (llvm::Value**)palloc(sizeof(llvm::Value*) * numkeys); llvm::Value** pVector = (llvm::Value**)palloc(sizeof(llvm::Value*) * numkeys); llvm::Value** pFlag = (llvm::Value**)palloc(sizeof(llvm::Value*) * numkeys); /* HashAggRunner.BaseAggRunner.m_keyIdxInCell */ Vals3[0] = Datum_0; Vals3[1] = int32_0; Vals3[2] = int32_pos_bAggR_keyIdxInCell; llvm::Value* cellkeyIdx = builder.CreateInBoundsGEP(hAggRunner, Vals3); cellkeyIdx = builder.CreateAlignedLoad(cellkeyIdx, 4, "keyIdxInCellArr"); for (i = 0; i < numkeys; i++) { /* load keyIdx in cell */ tmpval = llvmCodeGen->getIntConstant(INT4OID, i); keyIdxInCell[i] = builder.CreateInBoundsGEP(cellkeyIdx, tmpval); keyIdxInCell[i] = builder.CreateAlignedLoad(keyIdxInCell[i], 4, "m_keyIdxInCell"); } /* load m_arr from batch data */ Vals[0] = Datum_0; Vals[1] = int32_pos_batch_marr; tmpval = builder.CreateInBoundsGEP(batch, Vals); llvm::Value* tmparr = builder.CreateAlignedLoad(tmpval, 8, "m_arr"); for (i = 0; i < numkeys; i++) { /* load scalarvector from m_arr */ AttrNumber key = keyIdx[i] - 1; Vals[0] = llvmCodeGen->getIntConstant(INT8OID, key); Vals[1] = int32_pos_scalvec_vals; llvm::Value* pVec = builder.CreateInBoundsGEP(tmparr, Vals); pVector[i] = builder.CreateAlignedLoad(pVec, 8, "pVector"); /* load flag information from m_arr */ Vals[1] = int32_pos_scalvec_flag; llvm::Value* Flag = builder.CreateInBoundsGEP(tmparr, Vals); pFlag[i] = builder.CreateAlignedLoad(Flag, 8, "pFlag"); } /* * Begin to loop the whole batch to evaluation hashval and initialize * the hash cell by matching hashval and key */ builder.SetInsertPoint(entry); tmpval = builder.CreateICmpSGT(nValues, int32_0); builder.CreateCondBr(tmpval, for_body, for_end); builder.SetInsertPoint(for_body); phi_idx = builder.CreatePHI(int64Type, 2); /* add one for every loop to check the index */ idx_next = builder.CreateAdd(phi_idx, Datum_1); phi_idx->addIncoming(Datum_0, entry); phi_idx->addIncoming(idx_next, for_inc); /* given the initial hash value */ llvm::Value* hash_res = int32_m1; bool rehash = false; /* evaluation the hash value for phi_idx-th tuple */ for (i = 0; i < numkeys; i++) { if (i > 0) rehash = true; llvm::Function* func_hashbatch = HashBatchCodeGen(node, i, rehash); if (func_hashbatch == NULL) { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Failed on generating HashBatchCodeGen!\n"))); } /* load the phi_idx-th value and flag of the current key */ llvm::Value* pval = builder.CreateInBoundsGEP(pVector[i], phi_idx); pval = builder.CreateAlignedLoad(pval, 8, "pval"); llvm::Value* pflag = builder.CreateInBoundsGEP(pFlag[i], phi_idx); pflag = builder.CreateAlignedLoad(pflag, 1, "pflag"); hash_res = builder.CreateCall(func_hashbatch, {pval, pflag, hash_res}); } /* store the hash value (hash_res) to m_hashVal of hashAggRunner */ /* hashAggRunner.m_hashVal[phi_idx] */ Vals3[1] = int32_pos_hAggR_hashVal; Vals3[2] = phi_idx; llvm::Value* hashValSlot = builder.CreateInBoundsGEP(hAggRunner, Vals3); llvm::Value* hash_res64 = builder.CreateZExt(hash_res, int64Type); builder.CreateAlignedStore(hash_res64, hashValSlot, 8); /* corresponding to m_cacheLoc[i] = m_hashVal[i] & mask; (uint64) */ llvm::Value* cacheLocVal = builder.CreateAnd(hash_res64, maskval, "cacheLoc"); /* store cacheLocVal to hashAggRunner.BaseAggRunner.hashBasedOperator.m_cacheLoc[i] */ Vals5[3] = int32_pos_hBOper_cacheLoc; Vals5[4] = phi_idx; llvm::Value* cacheLoc_i = builder.CreateInBoundsGEP(hAggRunner, Vals5); builder.CreateAlignedStore(cacheLocVal, cacheLoc_i, 8); llvm::Value* segmaxval = NULL; llvm::Value* nsegsval = NULL; llvm::Value* pos = NULL; if (!isSglTbl) { /* get m_hashseg_max from hashAggRunner to calculate nsegs and pos */ Vals[0] = Datum_0; Vals[1] = int32_pos_hAggR_hsegmax; /* segmaxval, nsegsval and pos are for AgghashingCodeGen */ segmaxval = builder.CreateInBoundsGEP(hAggRunner, Vals); segmaxval = builder.CreateAlignedLoad(segmaxval, 4, "segmax"); segmaxval = builder.CreateSExt(segmaxval, int64Type); /* nsegs = m_cacheLoc[i] / m_hashseg_max */ nsegsval = builder.CreateExactUDiv(cacheLocVal, segmaxval, "nsegs"); nsegsval = builder.CreateTrunc(nsegsval, int32Type); /* pos = m_cacheLoc[i] % m_hashseg_max */ pos = builder.CreateSRem(cacheLocVal, segmaxval, "pos"); } /* get m_hashData from hashAggRunner */ Vals[0] = Datum_0; Vals[1] = int32_pos_hAggR_hSegTbl; llvm::Value* hashData = builder.CreateInBoundsGEP(hAggRunner, Vals); hashData = builder.CreateAlignedLoad(hashData, 8, "m_hashData"); if (isSglTbl) { Vals[0] = int32_0; } else { Vals[0] = nsegsval; } Vals[1] = int32_1; llvm::Value* tbldata = builder.CreateInBoundsGEP(hashData, Vals); tbldata = builder.CreateAlignedLoad(tbldata, 8, "tbl_data"); if (isSglTbl) { tmpval = builder.CreateInBoundsGEP(tbldata, cacheLocVal); } else { tmpval = builder.CreateInBoundsGEP(tbldata, pos); } llvm::Value* cellval = builder.CreateAlignedLoad(tmpval, 8, "cell"); /* check if cell is NULL or not */ tmpval = builder.CreatePtrToInt(cellval, int64Type); if (!isSglTbl) { cellval = builder.CreateIntToPtr(tmpval, hashCellPtrType); } cmpval = builder.CreateICmpEQ(tmpval, Datum_0); builder.CreateCondBr(cmpval, alloc_hashslot, while_body); /* while (cell!= NULL) { compare hash value and do match_key } */ builder.SetInsertPoint(while_body); llvm::PHINode* phi_cell = builder.CreatePHI(hashCellPtrType, 2); /* get next cell : cell = cell->flag.m_next: hashCell.flag.m_next */ builder.SetInsertPoint(next_cell); Vals3[0] = Datum_0; Vals3[1] = int32_0; Vals3[2] = int32_0; llvm::Value* nextcellval = builder.CreateInBoundsGEP(phi_cell, Vals3); nextcellval = builder.CreateAlignedLoad(nextcellval, 8, "nextcellval"); tmpval = builder.CreatePtrToInt(nextcellval, int64Type); cmpval = builder.CreateICmpEQ(tmpval, Datum_0); builder.CreateCondBr(cmpval, alloc_hashslot, while_body); /* loop over the whole hash cell chain */ builder.SetInsertPoint(while_body); phi_cell->addIncoming(nextcellval, next_cell); phi_cell->addIncoming(cellval, for_body); /* get hash value from current cell : cell->m_val[m_cols].val */ tmpval = builder.CreateAlignedLoad(m_colsVal, 4, "m_cols"); tmpval = builder.CreateZExt(tmpval, int64Type); Vals4[0] = Datum_0; Vals4[1] = int32_pos_hcell_mval; Vals4[2] = tmpval; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(phi_cell, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 8, "hashval_cell"); llvm::Value* cmp_hashval = builder.CreateICmpEQ(hash_res64, tmpval); builder.CreateCondBr(cmp_hashval, hashval_eq, next_cell); /* * When hash val is equal, we only consider the result of matchkey. * Loop over all the keys, once the key is not matched, get the * next cell. If all the keys have been compared, find the next cell * or keymatched according to the result. The following code is the * codegeneration of the following code: * if (true && match_key()(batch, i, cell)) * { ...; break; } * cell = cell->flag.m_next */ builder.SetInsertPoint(hashval_eq); for (i = 0; i < numkeys; i++) { llvm::Function* func_matchonekey = MatchOneKeyCodeGen(node, i); if (NULL == func_matchonekey) { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Failed on generating MatchOneKey Function!\n"))); } /* load the phi_idx-th value and flag of the current key */ llvm::Value* pval = builder.CreateInBoundsGEP(pVector[i], phi_idx); pval = builder.CreateAlignedLoad(pval, 8, "pval"); llvm::Value* pflag = builder.CreateInBoundsGEP(pFlag[i], phi_idx); pflag = builder.CreateAlignedLoad(pflag, 1, "pflag"); llvm::Value* res = builder.CreateCall(func_matchonekey, {pval, pflag, phi_cell, keyIdxInCell[i]}); cmpval = builder.CreateICmpEQ(res, Datum_0); if (i == numkeys - 1) builder.CreateCondBr(cmpval, next_cell, key_match); else { DEFINE_BLOCK(next_bb, jitted_agghashing); builder.CreateCondBr(cmpval, next_cell, next_bb); builder.SetInsertPoint(next_bb); } } /* * when both hash value and all keys are matched, remember this * hash cell and go to next tuple. */ builder.SetInsertPoint(key_match); /* get hashAggRunner.BaseAggRunner.m_Loc[phi_idx] */ Vals4[0] = Datum_0; Vals4[1] = int32_0; Vals4[2] = int32_pos_bAggR_Loc; Vals4[3] = phi_idx; tmpval = builder.CreateInBoundsGEP(hAggRunner, Vals4); builder.CreateAlignedStore(phi_cell, tmpval, 8); builder.CreateBr(for_inc); /* if (foundMatch == false){ allocate hash slot and initilize it } */ builder.SetInsertPoint(alloc_hashslot); /* HashAggRunner.BaseAggRunner.m_keySimple */ Vals3[0] = Datum_0; Vals3[1] = int32_0; Vals3[2] = int32_pos_bAggR_keySimple; llvm::Value* simple_key = builder.CreateInBoundsGEP(hAggRunner, Vals3); simple_key = builder.CreateAlignedLoad(simple_key, 4, "key_simple"); if (isSglTbl) { WarpSglTblAllocHashSlotCodeGen(&builder, hAggRunner, batch, phi_idx, simple_key); } else { WarpAllocHashSlotCodeGen(&builder, hAggRunner, batch, phi_idx, simple_key); } builder.CreateBr(for_inc); builder.SetInsertPoint(for_inc); tmpval = builder.CreateTrunc(idx_next, int32Type); tmpval = builder.CreateICmpEQ(tmpval, nValues); builder.CreateCondBr(tmpval, for_end, for_body); /* return nothing after hashing all the tuples */ builder.SetInsertPoint(for_end); builder.CreateRetVoid(); llvmCodeGen->FinalizeFunction(jitted_agghashing, vecagg->plan.plan_node_id); return jitted_agghashing; } /* * AgghashingWithPrefetchCodeGen * @Description : Codegeneration for hashing batch and match key in * buildAggTbl. To reduce cache miss, we need to prefetch * the hash cell. Different from the original function, * we use two loops to handle the hashing and match key * separately, since cache miss mostly happens during * matching key. * * AggSglTblhashingWithPrefetchCodeGen * @Description : Codegeneration for hashing batch and match key in * buildAggTbl. To reduce cache miss, we need to prefetch * the hash cell. Different from the original function, * we use two loops to handle the hashing and match key * separately, since cache miss mostly happens during * matching key. */ template llvm::Function* VecHashAggCodeGen::AgghashingWithPrefetchCodeGenorSglTbl(VecAggState* node) { Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* If the condition can not be satisfied, no need to codegen */ if (!AgghashingJittable(node)) return NULL; /* Find and load the IR file from the installaion directory */ llvmCodeGen->loadIRFile(); /* Extract plan information from node */ int i = 0; VecAgg* vecagg = (VecAgg*)(node->ss.ps.plan); int numkeys = vecagg->numCols; AttrNumber* keyIdx = vecagg->grpColIdx; /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); llvm::Value* hAggRunner = NULL; llvm::Value* batch = NULL; llvm::Value* tmpval = NULL; llvm::Value* cmpval = NULL; llvm::Value* idx_next = NULL; llvm::Value* llvmargs[2]; llvm::PHINode* phi_idx = NULL; llvm::Function* jitted_agghashing = NULL; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(hashCellPtrType, "struct.hashCell"); DEFINE_CG_PTRTYPE(vectorBatchPtrType, "class.VectorBatch"); DEFINE_CG_PTRTYPE(hashAggRunnerPtrType, "class.HashAggRunner"); DEFINE_CGVAR_INT32(int32_m1, -1); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT32(int32_pos_hBOper_cols, pos_hBOper_cols); DEFINE_CGVAR_INT32(int32_pos_hBOper_cacheLoc, pos_hBOper_cacheLoc); DEFINE_CGVAR_INT32(int32_pos_hAggR_hashVal, pos_hAggR_hashVal); DEFINE_CGVAR_INT32(int32_pos_hAggR_hSegTbl, pos_hAggR_hSegTbl); DEFINE_CGVAR_INT32(int32_pos_hAggR_hsegmax, pos_hAggR_hsegmax); /* just for AgghashingWithPrefetchCodeGen */ DEFINE_CGVAR_INT32(int32_pos_hAggR_hashSize, pos_hAggR_hashSize); DEFINE_CGVAR_INT32(int32_pos_bAggR_keyIdxInCell, pos_bAggR_keyIdxInCell); DEFINE_CGVAR_INT32(int32_pos_bAggR_Loc, pos_bAggR_Loc); DEFINE_CGVAR_INT32(int32_pos_bAggR_keySimple, pos_bAggR_keySimple); DEFINE_CGVAR_INT32(int32_pos_hcell_mval, pos_hcell_mval); DEFINE_CGVAR_INT32(int32_pos_batch_marr, pos_batch_marr); DEFINE_CGVAR_INT32(int32_pos_scalvec_vals, pos_scalvec_vals); DEFINE_CGVAR_INT32(int32_pos_scalvec_flag, pos_scalvec_flag); DEFINE_CGVAR_INT64(Datum_0, 0); DEFINE_CGVAR_INT64(Datum_1, 1); /* llvm array values, used to represent the location of some element */ llvm::Value* Vals[2] = {Datum_0, int32_0}; llvm::Value* Vals3[3] = {Datum_0, int32_0, int32_0}; llvm::Value* Vals4[4] = {Datum_0, int32_0, int32_0, int32_0}; llvm::Value* Vals5[5] = {Datum_0, int32_0, int32_0, int32_0, int32_0}; const char* name = NULL; if (isSglTbl) { name = "JittedSglTblAggHashingWithPreFetch"; } else { name = "JittedAggHashingWithPreFetch"; } GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, name, voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("hashAggRunner", hashAggRunnerPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("batch", vectorBatchPtrType)); jitted_agghashing = fn_prototype.generatePrototype(&builder, &llvmargs[0]); /* start the main codegen process for hashagg */ hAggRunner = llvmargs[0]; batch = llvmargs[1]; /* get the number of rows of this batch : VectorBatch.m_rows */ tmpval = builder.CreateInBoundsGEP(batch, Vals); llvm::Value* nValues = builder.CreateAlignedLoad(tmpval, 4, "m_rows"); /* mask = hashAggRunner.m_hashSize - 1 */ Vals[1] = int32_pos_hAggR_hashSize; tmpval = builder.CreateInBoundsGEP(hAggRunner, Vals); llvm::Value* maskval = builder.CreateAlignedLoad(tmpval, 8, "m_hashSize"); maskval = builder.CreateSub(maskval, Datum_1, "mask"); /* HashAggRunner.BaseAggRunner.hashBasedOperator.m_cols */ Vals4[3] = int32_pos_hBOper_cols; llvm::Value* m_colsVal = builder.CreateInBoundsGEP(hAggRunner, Vals4); /* define basic block information for batch loop */ llvm::BasicBlock* entry = &jitted_agghashing->getEntryBlock(); DEFINE_BLOCK(hashing_for_body, jitted_agghashing); DEFINE_BLOCK(hashing_for_end, jitted_agghashing); DEFINE_BLOCK(hashing_for_inc, jitted_agghashing); DEFINE_BLOCK(for_body, jitted_agghashing); DEFINE_BLOCK(for_end, jitted_agghashing); DEFINE_BLOCK(for_inc, jitted_agghashing); /* define basic block information for key-value matching */ DEFINE_BLOCK(while_body, jitted_agghashing); DEFINE_BLOCK(key_match, jitted_agghashing); DEFINE_BLOCK(hashval_eq, jitted_agghashing); DEFINE_BLOCK(next_cell, jitted_agghashing); DEFINE_BLOCK(alloc_hashslot, jitted_agghashing); /* define vector structures used for store batch info. in LLVM */ llvm::Value** keyIdxInCell = (llvm::Value**)palloc(sizeof(llvm::Value*) * numkeys); llvm::Value** pVector = (llvm::Value**)palloc(sizeof(llvm::Value*) * numkeys); llvm::Value** pFlag = (llvm::Value**)palloc(sizeof(llvm::Value*) * numkeys); /* HashAggRunner.BaseAggRunner.m_keyIdxInCell */ Vals3[0] = Datum_0; Vals3[1] = int32_0; Vals3[2] = int32_pos_bAggR_keyIdxInCell; llvm::Value* cellkeyIdx = builder.CreateInBoundsGEP(hAggRunner, Vals3); cellkeyIdx = builder.CreateAlignedLoad(cellkeyIdx, 4, "keyIdxInCellArr"); for (i = 0; i < numkeys; i++) { /* load keyIdx in cell */ tmpval = llvmCodeGen->getIntConstant(INT4OID, i); keyIdxInCell[i] = builder.CreateInBoundsGEP(cellkeyIdx, tmpval); keyIdxInCell[i] = builder.CreateAlignedLoad(keyIdxInCell[i], 4, "m_keyIdxInCell"); } /* load m_arr from batch data */ Vals[0] = Datum_0; Vals[1] = int32_pos_batch_marr; tmpval = builder.CreateInBoundsGEP(batch, Vals); llvm::Value* tmparr = builder.CreateAlignedLoad(tmpval, 8, "m_arr"); for (i = 0; i < numkeys; i++) { /* load scalarvector from m_arr */ AttrNumber key = keyIdx[i] - 1; Vals[0] = llvmCodeGen->getIntConstant(INT8OID, key); Vals[1] = int32_pos_scalvec_vals; llvm::Value* pVec = builder.CreateInBoundsGEP(tmparr, Vals); pVector[i] = builder.CreateAlignedLoad(pVec, 8, "pVector"); /* load flag information from m_arr */ Vals[1] = int32_pos_scalvec_flag; llvm::Value* Flag = builder.CreateInBoundsGEP(tmparr, Vals); pFlag[i] = builder.CreateAlignedLoad(Flag, 8, "pFlag"); } /* * Begin to loop the whole batch to evaluation hashval */ builder.SetInsertPoint(entry); tmpval = builder.CreateICmpSGT(nValues, int32_0); builder.CreateCondBr(tmpval, hashing_for_body, for_end); builder.SetInsertPoint(hashing_for_body); phi_idx = builder.CreatePHI(int64Type, 2); /* add one for every hashing loop to check the index */ idx_next = builder.CreateAdd(phi_idx, Datum_1); phi_idx->addIncoming(Datum_0, entry); phi_idx->addIncoming(idx_next, hashing_for_inc); /* given the initial hash value */ llvm::Value* hash_res = int32_m1; bool rehash = false; /* evaluation the hash value for phi_idx-th tuple */ for (i = 0; i < numkeys; i++) { if (i > 0) rehash = true; llvm::Function* func_hashbatch = HashBatchCodeGen(node, i, rehash); if (func_hashbatch == NULL) { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Failed on generating HashBatchCodeGen!\n"))); } /* load the phi_idx-th value and flag of the current key */ llvm::Value* pval = builder.CreateInBoundsGEP(pVector[i], phi_idx); pval = builder.CreateAlignedLoad(pval, 8, "pval"); llvm::Value* pflag = builder.CreateInBoundsGEP(pFlag[i], phi_idx); pflag = builder.CreateAlignedLoad(pflag, 1, "pflag"); hash_res = builder.CreateCall(func_hashbatch, {pval, pflag, hash_res}); } /* store the hash value (hash_res) to m_hashVal of hashAggRunner */ /* hashAggRunner.m_hashVal[phi_idx] */ Vals3[1] = int32_pos_hAggR_hashVal; Vals3[2] = phi_idx; llvm::Value* hashValSlot = builder.CreateInBoundsGEP(hAggRunner, Vals3); llvm::Value* hash_res64 = builder.CreateZExt(hash_res, int64Type); builder.CreateAlignedStore(hash_res64, hashValSlot, 8); builder.CreateBr(hashing_for_inc); builder.SetInsertPoint(hashing_for_inc); tmpval = builder.CreateTrunc(idx_next, int32Type); tmpval = builder.CreateICmpEQ(tmpval, nValues); builder.CreateCondBr(tmpval, hashing_for_end, hashing_for_body); builder.SetInsertPoint(hashing_for_end); builder.CreateBr(for_body); /* * Ending the hashing loop and starting the loop for match_key. * Initializing hash cell according to the match key result. */ builder.SetInsertPoint(for_body); phi_idx = builder.CreatePHI(int64Type, 2); /* add one for every match key loop to check the index */ idx_next = builder.CreateAdd(phi_idx, Datum_1); phi_idx->addIncoming(Datum_0, hashing_for_end); phi_idx->addIncoming(idx_next, for_inc); /* Prefetch hashData[m_cacheLoc[i+2]] and &(hashData[m_cacheLoc[i+4]]) */ if (isSglTbl) { llvm::Function* func_sgltblprefetch = llvmCodeGen->module()->getFunction("prefetchAggSglTblHashing"); if (func_sgltblprefetch == NULL) { func_sgltblprefetch = prefetchAggSglTblHashingCodeGen(); } llvm::Value* nrows = builder.CreateZExt(nValues, int64Type); builder.CreateCall(func_sgltblprefetch, {hAggRunner, phi_idx, nrows}); } else { llvm::Function* func_prefetch = llvmCodeGen->module()->getFunction("prefetchAggHashing"); if (func_prefetch == NULL) { func_prefetch = prefetchAggHashingCodeGen(); } llvm::Value* nrows = builder.CreateZExt(nValues, int64Type); builder.CreateCall(func_prefetch, {hAggRunner, phi_idx, nrows}); } /* load the hash value (hash_res) from m_hashVal of hashAggRunner */ /* hashAggRunner.m_hashVal[phi_idx] */ Vals3[1] = int32_pos_hAggR_hashVal; Vals3[2] = phi_idx; hashValSlot = builder.CreateInBoundsGEP(hAggRunner, Vals3); hash_res64 = builder.CreateAlignedLoad(hashValSlot, 8, "m_hashVal"); hash_res = builder.CreateTrunc(hash_res64, int32Type); /* corresponding to m_cacheLoc[i] = m_hashVal[i] & mask; (uint64) */ llvm::Value* cacheLocVal = builder.CreateAnd(hash_res64, maskval, "cacheLoc"); /* store cacheLocVal to hashAggRunner.BaseAggRunner.hashBasedOperator.m_cacheLoc[i] */ Vals5[3] = int32_pos_hBOper_cacheLoc; Vals5[4] = phi_idx; llvm::Value* cacheLoc_i = builder.CreateInBoundsGEP(hAggRunner, Vals5); builder.CreateAlignedStore(cacheLocVal, cacheLoc_i, 8); /* segmaxval, nsegsval and pos are for AgghashingCodeGen */ llvm::Value* segmaxval = NULL; llvm::Value* nsegsval = NULL; llvm::Value* pos = NULL; /* get m_hashseg_max from hashAggRunner to calculate nsegs and pos */ if (!isSglTbl) { Vals[0] = Datum_0; Vals[1] = int32_pos_hAggR_hsegmax; segmaxval = builder.CreateInBoundsGEP(hAggRunner, Vals); segmaxval = builder.CreateAlignedLoad(segmaxval, 4, "segmax"); segmaxval = builder.CreateSExt(segmaxval, int64Type); /* nsegs = m_cacheLoc[i] / m_hashseg_max */ nsegsval = builder.CreateExactUDiv(cacheLocVal, segmaxval, "nsegs"); nsegsval = builder.CreateTrunc(nsegsval, int32Type); /* pos = m_cacheLoc[i] % m_hashseg_max */ pos = builder.CreateSRem(cacheLocVal, segmaxval, "pos"); } /* get m_hashData from hashAggRunner */ Vals[0] = Datum_0; Vals[1] = int32_pos_hAggR_hSegTbl; llvm::Value* hashData = builder.CreateInBoundsGEP(hAggRunner, Vals); hashData = builder.CreateAlignedLoad(hashData, 8, "m_hashData"); if (isSglTbl) { Vals[0] = int32_0; } else { Vals[0] = nsegsval; } Vals[1] = int32_1; llvm::Value* tbldata = builder.CreateInBoundsGEP(hashData, Vals); tbldata = builder.CreateAlignedLoad(tbldata, 8, "tbl_data"); if (isSglTbl) { tmpval = builder.CreateInBoundsGEP(tbldata, cacheLocVal); } else { tmpval = builder.CreateInBoundsGEP(tbldata, pos); } llvm::Value* cellval = builder.CreateAlignedLoad(tmpval, 8, "cell"); /* check if cell is NULL or not */ tmpval = builder.CreatePtrToInt(cellval, int64Type); cmpval = builder.CreateICmpEQ(tmpval, Datum_0); builder.CreateCondBr(cmpval, alloc_hashslot, while_body); /* while (cell!= NULL) { compare hash value and do match_key } */ builder.SetInsertPoint(while_body); llvm::PHINode* phi_cell = builder.CreatePHI(hashCellPtrType, 2); /* get next cell : cell = cell->flag.m_next: hashCell.flag.m_next */ builder.SetInsertPoint(next_cell); Vals3[0] = Datum_0; Vals3[1] = int32_0; Vals3[2] = int32_0; llvm::Value* nextcellval = builder.CreateInBoundsGEP(phi_cell, Vals3); nextcellval = builder.CreateAlignedLoad(nextcellval, 8, "nextcellval"); tmpval = builder.CreatePtrToInt(nextcellval, int64Type); cmpval = builder.CreateICmpEQ(tmpval, Datum_0); builder.CreateCondBr(cmpval, alloc_hashslot, while_body); /* loop over the whole hash cell chain */ builder.SetInsertPoint(while_body); phi_cell->addIncoming(nextcellval, next_cell); phi_cell->addIncoming(cellval, for_body); /* get hash value from current cell : cell->m_val[m_cols].val */ tmpval = builder.CreateAlignedLoad(m_colsVal, 4, "m_cols"); tmpval = builder.CreateZExt(tmpval, int64Type); Vals4[0] = Datum_0; Vals4[1] = int32_pos_hcell_mval; Vals4[2] = tmpval; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(phi_cell, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 8, "hashval_cell"); llvm::Value* cmp_hashval = builder.CreateICmpEQ(hash_res64, tmpval); builder.CreateCondBr(cmp_hashval, hashval_eq, next_cell); /* * When hash val is equal, we only consider the result of matchkey. * Loop over all the keys, once the key is not matched, get the * next cell. If all the keys have been compared, find the next cell * or keymatched according to the result. The following code is the * codegeneration of the following code: * if (true && match_key()(batch, i, cell)) * { ...; break; } * cell = cell->flag.m_next */ builder.SetInsertPoint(hashval_eq); for (i = 0; i < numkeys; i++) { llvm::Function* func_matchonekey = MatchOneKeyCodeGen(node, i); if (NULL == func_matchonekey) { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Failed on generating MatchOneKey Function!\n"))); } /* load the phi_idx-th value and flag of the current key */ llvm::Value* pval = builder.CreateInBoundsGEP(pVector[i], phi_idx); pval = builder.CreateAlignedLoad(pval, 8, "pval"); llvm::Value* pflag = builder.CreateInBoundsGEP(pFlag[i], phi_idx); pflag = builder.CreateAlignedLoad(pflag, 1, "pflag"); llvm::Value* res = builder.CreateCall(func_matchonekey, {pval, pflag, phi_cell, keyIdxInCell[i]}); cmpval = builder.CreateICmpEQ(res, Datum_0); if (i == numkeys - 1) builder.CreateCondBr(cmpval, next_cell, key_match); else { DEFINE_BLOCK(next_bb, jitted_agghashing); builder.CreateCondBr(cmpval, next_cell, next_bb); builder.SetInsertPoint(next_bb); } } /* * when both hash value and all keys are matched, remember this * hash cell and go to next tuple. */ builder.SetInsertPoint(key_match); /* get hashAggRunner.BaseAggRunner.m_Loc[phi_idx] */ Vals4[0] = Datum_0; Vals4[1] = int32_0; Vals4[2] = int32_pos_bAggR_Loc; Vals4[3] = phi_idx; tmpval = builder.CreateInBoundsGEP(hAggRunner, Vals4); builder.CreateAlignedStore(phi_cell, tmpval, 8); builder.CreateBr(for_inc); /* if (foundMatch == false){ allocate hash slot and initilize it } */ builder.SetInsertPoint(alloc_hashslot); /* HashAggRunner.BaseAggRunner.m_keySimple */ Vals3[0] = Datum_0; Vals3[1] = int32_0; Vals3[2] = int32_pos_bAggR_keySimple; llvm::Value* simple_key = builder.CreateInBoundsGEP(hAggRunner, Vals3); simple_key = builder.CreateAlignedLoad(simple_key, 4, "key_simple"); WarpAllocHashSlotCodeGen(&builder, hAggRunner, batch, phi_idx, simple_key); builder.CreateBr(for_inc); builder.SetInsertPoint(for_inc); tmpval = builder.CreateTrunc(idx_next, int32Type); tmpval = builder.CreateICmpEQ(tmpval, nValues); builder.CreateCondBr(tmpval, for_end, for_body); /* return nothing after hashing all the tuples */ builder.SetInsertPoint(for_end); builder.CreateRetVoid(); llvmCodeGen->FinalizeFunction(jitted_agghashing, vecagg->plan.plan_node_id); return jitted_agghashing; } llvm::Function* VecHashAggCodeGen::BatchAggregationCodeGen(VecAggState* node, bool use_prefetch) { /* First get the basic information of VecAggState */ int numaggs = node->numaggs; AggStatePerAgg peragg = node->peragg; Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; if (!BatchAggJittable(node, false)) return NULL; /* Find and load the IR file from the installaion directory */ llvmCodeGen->loadIRFile(); /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); llvm::Module* mod = llvmCodeGen->module(); int i; int exprscale = 0; bool fast_aggref = false; ExprState* estate = NULL; llvm::Value* nValues = NULL; llvm::Value* tmpval = NULL; llvm::Value* idx_next = NULL; llvm::Value* cell = NULL; llvm::Value* result = NULL; llvm::Value* expres = NULL; llvm::Value* llvmargs[4]; Aggref* aggref = NULL; llvm::Function* jitted_batchagg = NULL; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int8Type, CHAROID); DEFINE_CG_TYPE(int16Type, INT2OID); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(int32PtrType, INT4OID); DEFINE_CG_PTRTYPE(int64PtrType, INT8OID); DEFINE_CG_PTRTYPE(hashCellPtrType, "struct.hashCell"); DEFINE_CG_PTRTYPE(ExprContextPtrType, "struct.ExprContext"); DEFINE_CG_PTRTYPE(vectorBatchPtrType, "class.VectorBatch"); DEFINE_CG_PTRTYPE(hashAggRunnerPtrType, "class.HashAggRunner"); DEFINE_CG_PTRTYPE(numericPtrType, "struct.NumericData"); llvm::Type* hashCellPtrPtrType = llvmCodeGen->getPtrType(hashCellPtrType); /* create LLVM value with {uint16, int64} format type */ llvm::Type* Elements[] = {int16Type, int64Type}; llvm::Type* SiNumeric64Type = llvm::StructType::create(context, Elements, "SiNumeric64"); DEFINE_CGVAR_INT8(int8_0, 0); DEFINE_CGVAR_INT8(int8_1, 1); DEFINE_CGVAR_INT16(val_mask, NUMERIC_BI_MASK); DEFINE_CGVAR_INT16(val_binum64, NUMERIC_64); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT64(int64_0, 0); DEFINE_CGVAR_INT64(int64_1, 1); DEFINE_CGVAR_INT64(int64_6, 6); DEFINE_CGVAR_INT32(int32_pos_batch_marr, pos_batch_marr); DEFINE_CGVAR_INT32(int32_pos_scalvec_vals, pos_scalvec_vals); DEFINE_CGVAR_INT32(int32_pos_scalvec_flag, pos_scalvec_flag); DEFINE_CGVAR_INT32(int32_pos_ecxt_pertuple, pos_ecxt_pertuple); DEFINE_CGVAR_INT32(int32_pos_ecxt_outerbatch, pos_ecxt_outerbatch); DEFINE_CGVAR_INT32(int32_pos_hBOper_hcxt, pos_hBOper_hcxt); DEFINE_CGVAR_INT32(int32_pos_bAggR_econtext, pos_bAggR_econtext); DEFINE_CGVAR_INT32(int32_pos_hcell_mval, pos_hcell_mval); /* llvm array values, used to represent the location of some element */ llvm::Value* Vals[2] = {int64_0, int32_0}; llvm::Value* Vals3[3] = {int64_0, int32_0, int32_0}; llvm::Value* Vals4[4] = {int64_0, int32_0, int32_0, int32_0}; llvm::Value** aggIdxList = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); llvm::Value** batch_vals = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); llvm::Value** batch_flag = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); llvm::BasicBlock** agg_bb = (llvm::BasicBlock**)palloc(sizeof(llvm::BasicBlock*) * numaggs); llvm::BasicBlock** flag_then = (llvm::BasicBlock**)palloc(sizeof(llvm::BasicBlock*) * numaggs); llvm::BasicBlock** flag_else = (llvm::BasicBlock**)palloc(sizeof(llvm::BasicBlock*) * numaggs); GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "JittedFastBatchAgg", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("haRuner", hashAggRunnerPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("Loc", hashCellPtrPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("batch", vectorBatchPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("aggIdx", int32PtrType)); jitted_batchagg = fn_prototype.generatePrototype(&builder, &llvmargs[0]); llvm::Value* hAggRunner = llvmargs[0]; llvm::Value* Loc = llvmargs[1]; llvm::Value* batch = llvmargs[2]; llvm::Value* aggIdx = llvmargs[3]; /* parameter used to mark if this tuple is NULL or not */ llvm::Value* isNull = builder.CreateAlloca(int8Type); /* HashAggRunner.BaseAggRunner.hashBasedOperator.m_hashContext */ Vals4[3] = int32_pos_hBOper_hcxt; llvm::Value* hcxt = builder.CreateInBoundsGEP(hAggRunner, Vals4); hcxt = builder.CreateAlignedLoad(hcxt, 8, "hashContext"); /* get the nrows of the batch */ tmpval = builder.CreateInBoundsGEP(batch, Vals); nValues = builder.CreateAlignedLoad(tmpval, 4, "m_rows"); /* get vectorBatch.m_arr of the batch */ Vals[0] = int64_0; Vals[1] = int32_pos_batch_marr; llvm::Value* argVector = builder.CreateInBoundsGEP(batch, Vals); argVector = builder.CreateAlignedLoad(argVector, 8, "m_arr"); /* pre-load all the expression context */ llvm::Value** econtext = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); for (i = 0; i < numaggs; i++) { econtext[i] = NULL; AggStatePerAgg peraggstate = &node->peragg[numaggs - i - 1]; aggref = (Aggref*)(peraggstate->aggref); if (peraggstate->evalproj != NULL && aggref->aggfnoid != COUNTOID) { ExprContext* exprcontext = peragg[numaggs - 1 - i].evalproj->pi_exprContext; econtext[i] = llvmCodeGen->CastPtrToLlvmPtr(ExprContextPtrType, exprcontext); } } /* define the basic block needed in the main process */ llvm::BasicBlock* entry = &jitted_batchagg->getEntryBlock(); DEFINE_BLOCK(for_body, jitted_batchagg); DEFINE_BLOCK(for_inc, jitted_batchagg); DEFINE_BLOCK(for_end, jitted_batchagg); /* get all addIdx of agg operators */ for (i = 0; i < numaggs; i++) { llvm::Value* tmpidx = llvmCodeGen->getIntConstant(INT4OID, i); tmpval = builder.CreateInBoundsGEP(aggIdx, tmpidx); tmpval = builder.CreateAlignedLoad(tmpval, 4, "aggIdx"); aggIdxList[i] = builder.CreateSExt(tmpval, int64Type); agg_bb[i] = llvm::BasicBlock::Create(context, "agg_bb", jitted_batchagg); flag_then[i] = llvm::BasicBlock::Create(context, "flag_then", jitted_batchagg); flag_else[i] = llvm::BasicBlock::Create(context, "flag_else", jitted_batchagg); } /* * Start the main process for batchaggregation, which has the following * pedudo code: * for (j = 0; j < nrows; j++){ * for (i = 0; i < m_aggNum; i++){ * peraggstate = peragg[numaggs - 1 - i]; * pbatch = ExecVecProject (peraggstate->evalproj) * AggregationOnScalar(aggInfo[i], &pbatch->m_arr[0], aggidx[i], m_Loc) * } * } */ builder.SetInsertPoint(entry); /* * First get the ecxt_per_tuple_memory, since we need to switch to this * memory context. */ /* HashAggRunner.BaseAggRunner.m_econtext */ Vals3[2] = int32_pos_bAggR_econtext; llvm::Value* mecontext = builder.CreateInBoundsGEP(hAggRunner, Vals3); mecontext = builder.CreateAlignedLoad(mecontext, 8, "m_econtext"); Vals[1] = int32_pos_ecxt_pertuple; llvm::Value* agg_expr_context = builder.CreateInBoundsGEP(mecontext, Vals); agg_expr_context = builder.CreateAlignedLoad(agg_expr_context, 8, "agg_per_tuple_memory"); llvm::Value* agg_oldcontext = VecExprCodeGen::MemCxtSwitToCodeGen(&builder, agg_expr_context); /* * Load value and flag from batch before the batch loop when we have * simple vars in transition level. */ for (i = 0; i < numaggs; i++) { int numSimpleVars = 0; AggStatePerAgg peraggstate = &node->peragg[numaggs - i - 1]; aggref = (Aggref*)(peraggstate->aggref); ProjectionInfo* projInfo = (ProjectionInfo*)(peraggstate->evalproj); if (aggref->aggstage == 0 && aggref->aggfnoid != COUNTOID) { numSimpleVars = projInfo->pi_numSimpleVars; if (numSimpleVars > 0) { int* varNumbers = projInfo->pi_varNumbers; int varNumber = varNumbers[0] - 1; /* m_arr[varNumber].m_vals */ Vals[0] = llvmCodeGen->getIntConstant(INT8OID, varNumber); Vals[1] = int32_pos_scalvec_vals; tmpval = builder.CreateInBoundsGEP(argVector, Vals); tmpval = builder.CreateAlignedLoad(tmpval, 8, "m_vals"); batch_vals[i] = tmpval; /* m_arr[varNumber].m_flag */ Vals[1] = int32_pos_scalvec_flag; llvm::Value* argFlag = builder.CreateInBoundsGEP(argVector, Vals); argFlag = builder.CreateAlignedLoad(argFlag, 1, "m_flag"); batch_flag[i] = argFlag; } } } tmpval = builder.CreateICmpSGT(nValues, int32_0); builder.CreateCondBr(tmpval, for_body, for_end); builder.SetInsertPoint(for_body); llvm::PHINode* phi_idx = builder.CreatePHI(int64Type, 2); /* after each loop, index plus one */ idx_next = builder.CreateAdd(phi_idx, int64_1); phi_idx->addIncoming(int64_0, entry); phi_idx->addIncoming(idx_next, for_inc); /* define prefetch function to prefetch loc[i+2] to avoid cache miss */ if (use_prefetch) { llvm::Function* func_prefetch = llvmCodeGen->module()->getFunction("prefetchBatchAggregation"); if (NULL == func_prefetch) { func_prefetch = prefetchBatchAggregationCodeGen(); } llvm::Value* nrows = builder.CreateZExt(nValues, int64Type); builder.CreateCall(func_prefetch, {Loc, phi_idx, nrows}); } /* * get the hashcell : cell = Loc[i] (see vnumeric_sum and vint8_sum) * and check if it is NULL */ tmpval = builder.CreateInBoundsGEP(Loc, phi_idx); cell = builder.CreateAlignedLoad(tmpval, 8, "hashCell"); tmpval = builder.CreatePtrToInt(cell, int64Type); tmpval = builder.CreateICmpEQ(tmpval, int64_0); builder.CreateCondBr(tmpval, for_inc, agg_bb[0]); /* loop over the numaggs */ int numSimpleVars = 0; for (i = 0; i < numaggs; i++) { llvm::BasicBlock* bisum_bb = NULL; llvm::BasicBlock* numsum_bb = NULL; /* the inverse order */ AggStatePerAgg peraggstate = &node->peragg[numaggs - i - 1]; aggref = (Aggref*)(peraggstate->aggref); ProjectionInfo* projInfo = (ProjectionInfo*)(peraggstate->evalproj); /* start the codegeneration for each aggregation */ builder.SetInsertPoint(agg_bb[i]); if (aggref->aggstage == 0) { if (aggref->aggfnoid != COUNTOID) { Assert(peraggstate->evalproj != NULL); AggrefExprState* aggexprstate = peraggstate->aggrefstate; /* check if current expression can be codegened in fast path or not */ estate = (ExprState*)linitial(aggexprstate->args); fast_aggref = AggRefFastJittable(estate); /* * Do not consider collection and finalization level for * numeric_avg to avoid deconstruct_array. */ if (aggref->aggfnoid == NUMERICAVGFUNCOID && aggref->aggstage > 0) fast_aggref = false; /* If the current agg expression is just a simple var, * load it from the batch directly */ if (projInfo == NULL) { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Unexpected NULL project information."))); } numSimpleVars = projInfo->pi_numSimpleVars; if (numSimpleVars > 0) { /* m_arr[varNumber].m_vals */ tmpval = batch_vals[i]; tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); result = builder.CreateAlignedLoad(tmpval, 8, "val"); /* m_arr[varNumber].m_flag */ tmpval = batch_flag[i]; tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); tmpval = builder.CreateAlignedLoad(tmpval, 1, "flag"); builder.CreateAlignedStore(tmpval, isNull, 1); } else { /* set the batch information : econtext->ecxt_outerbatch = batch */ Vals[0] = int64_0; Vals[1] = int32_pos_ecxt_outerbatch; llvm::Value* tmp_outerbatch = builder.CreateInBoundsGEP(econtext[i], Vals); builder.CreateAlignedStore(batch, tmp_outerbatch, 8); /* * If fast_aggref is true, we could try to evaluate the * expression value by using BI64 all the way, and turn * to original path once meet outofbound. */ if (fast_aggref) { llvm::BasicBlock* bb_last = builder.GetInsertBlock(); DEFINE_BLOCK(bb_null, jitted_batchagg); DEFINE_BLOCK(bb_outofbound, jitted_batchagg); if (NULL == bisum_bb) { bisum_bb = llvm::BasicBlock::Create(context, "bisum_bb", jitted_batchagg); } if (NULL == numsum_bb) { numsum_bb = llvm::BasicBlock::Create(context, "numsum_bb", jitted_batchagg); } /* evaluate expression result */ llvm::Value* tmpexpres = EvalFastExprInBatchAgg(estate, builder, jitted_batchagg, &bb_null, &bb_last, &bb_outofbound, econtext[i], argVector, phi_idx); /* expres is already in {int16, int64} format */ builder.SetInsertPoint(bb_last); builder.CreateAlignedStore(int8_0, isNull, 1); llvm::Value* tmp_scale = builder.CreateExtractValue(tmpexpres, 0); llvm::Value* tmp_value = builder.CreateExtractValue(tmpexpres, 1); expres = llvm::UndefValue::get(SiNumeric64Type); expres = builder.CreateInsertValue(expres, tmp_scale, 0); expres = builder.CreateInsertValue(expres, tmp_value, 1); builder.CreateBr(bisum_bb); /* construct a null value, and no need to do aggregation */ builder.SetInsertPoint(bb_null); builder.CreateAlignedStore(int8_1, isNull, 1); if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* if result can not be represented in BI64, turn to * the original path */ builder.SetInsertPoint(bb_outofbound); /* Turn to per_tuple_memory to evaluate expression. */ Vals[0] = int64_0; Vals[1] = int32_pos_ecxt_pertuple; llvm::Value* curr_Context = builder.CreateInBoundsGEP(econtext[i], Vals); curr_Context = builder.CreateAlignedLoad(curr_Context, 8, "per_tuple_memory"); llvm::Value* cg_oldContext = VecExprCodeGen::MemCxtSwitToCodeGen(&builder, curr_Context); result = EvalSimpleExprInBatchAgg(estate, builder, econtext[i], phi_idx, isNull); /* return back to the old memory context */ (void)VecExprCodeGen::MemCxtSwitToCodeGen(&builder, cg_oldContext); } else { /* * corresponding to ExecVecProject(peraggstate->evalproj) : * to evaluate expressions, we should turn to per_tuple_memory. */ Vals[0] = int64_0; Vals[1] = int32_pos_ecxt_pertuple; llvm::Value* curr_Context = builder.CreateInBoundsGEP(econtext[i], Vals); curr_Context = builder.CreateAlignedLoad(curr_Context, 8, "per_tuple_memory"); llvm::Value* cg_oldContext = VecExprCodeGen::MemCxtSwitToCodeGen(&builder, curr_Context); /* corresponding to ExecVecProject(peraggstate->evalproj) */ result = EvalSimpleExprInBatchAgg(estate, builder, econtext[i], phi_idx, isNull); /* return back to the old memory context */ (void)VecExprCodeGen::MemCxtSwitToCodeGen(&builder, cg_oldContext); } } } else { /* * When current stage is transaction and aggfnoid is COUNTOID, no need to * load any batch information. since we only need to plus one when cell * is not null. */ result = int64_0; builder.CreateAlignedStore(int8_0, isNull, 1); } } else { /* * When aggref->stage is not transiction, the aggref expr is always * be var, so get the value from batch directly (projInfo is not null). */ if (projInfo != NULL) { int* varNumbers = projInfo->pi_varNumbers; int varNumber = varNumbers[0] - 1; /* m_arr[varNumber].m_vals */ Vals[0] = llvmCodeGen->getIntConstant(INT8OID, varNumber); Vals[1] = int32_pos_scalvec_vals; tmpval = builder.CreateInBoundsGEP(argVector, Vals); tmpval = builder.CreateAlignedLoad(tmpval, 8, "m_vals"); tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); result = builder.CreateAlignedLoad(tmpval, 8, "val"); /* m_arr[varNumber].m_flag */ Vals[1] = int32_pos_scalvec_flag; llvm::Value* argFlag = builder.CreateInBoundsGEP(argVector, Vals); argFlag = builder.CreateAlignedLoad(argFlag, 1, "m_flag"); tmpval = builder.CreateInBoundsGEP(argFlag, phi_idx); tmpval = builder.CreateAlignedLoad(tmpval, 1, "flag"); builder.CreateAlignedStore(tmpval, isNull, 1); } else { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Unexpected NULL project information."))); } } /* Compute Aggregation */ if (aggref->aggfnoid == COUNTOID) { flag_then[i]->eraseFromParent(); flag_else[i]->eraseFromParent(); char* Jittedname = NULL; if (aggref->aggstage == 0) Jittedname = "Jitted_count_0"; else Jittedname = "Jitted_count_1"; llvm::Function* func_vcount = llvmCodeGen->module()->getFunction(Jittedname); if (NULL == func_vcount) { func_vcount = vec_count_codegen(aggref); } builder.CreateCall(func_vcount, {cell, aggIdxList[i], result}); if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); } else { /* * now we already get HashCell cell(cellval) and pVector(result), check * the flag and do aggregation. */ /* see if IS_NULL(flag[phi_idx]) == false */ llvm::Value* tmpnull = builder.CreateAlignedLoad(isNull, 1, "tmpnull"); tmpnull = builder.CreateAnd(tmpnull, int8_1); llvm::Value* flag_cmp = builder.CreateICmpEQ(tmpnull, int8_0); builder.CreateCondBr(flag_cmp, flag_then[i], flag_else[i]); /* only do when not null */ builder.SetInsertPoint(flag_then[i]); switch (aggref->aggfnoid) { case INT8SUMFUNCOID: { llvm::Function* func_vint8sum = llvmCodeGen->module()->getFunction("Jitted_int8sum"); if (NULL == func_vint8sum) { func_vint8sum = int8_sum_codegen(aggref); } builder.CreateCall(func_vint8sum, {cell, hcxt, aggIdxList[i], result}); } break; case INT8AVGFUNCOID: { llvm::Function* func_vint8avg = llvmCodeGen->module()->getFunction("Jitted_int8avg"); if (NULL == func_vint8avg) { func_vint8avg = int8_avg_codegen(aggref); } builder.CreateCall(func_vint8avg, {cell, hcxt, aggIdxList[i], result}); } break; case NUMERICSUMFUNCOID: { /* * If aggref can be evaluated in fast path and just be * simple vars, use the result from batch. */ if (fast_aggref && (numSimpleVars > 0)) { DEFINE_BLOCK(agg_then, jitted_batchagg); DEFINE_BLOCK(agg_else, jitted_batchagg); DEFINE_BLOCK(agg_end, jitted_batchagg); DEFINE_BLOCK(normal_bb, jitted_batchagg); DEFINE_BLOCK(var_bisum_bb, jitted_batchagg); DEFINE_BLOCK(var_numsum_bb, jitted_batchagg); /* get the hash val : hashCell->m_val[aggidx].val */ Vals4[0] = int64_0; Vals4[1] = int32_pos_hcell_mval; Vals4[2] = aggIdxList[i]; Vals4[3] = int32_0; llvm::Value* cellval = builder.CreateInBoundsGEP(cell, Vals4); /* get the flag of the hash cell and check if it is NULL */ Vals4[3] = int32_1; llvm::Value* cellflag = builder.CreateInBoundsGEP(cell, Vals4); tmpval = builder.CreateAlignedLoad(cellflag, 1, "cellFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* cell be null, add Variable */ builder.SetInsertPoint(agg_then); /* * should make a new context to record the result : the * following code corresponding to: * 'leftarg = DatumGetBINumeric(pVal[i]); * cell->m_val[idx].val = addVariable(context, NumericGetDatum(leftarg));'. */ tmpval = DatumGetBINumericCodeGen(&builder, result); tmpval = builder.CreatePtrToInt(tmpval, int64Type); tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, cellval, 8); builder.CreateAlignedStore(int8_0, cellflag, 1); /* turn to next basicblock or end this */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* cell be not null, do aggregation */ builder.SetInsertPoint(agg_else); /* * When fast_aggref is true and numSimpleVars is greater than zero, * the expr is numeric type var. Convert this numeric type data to * SiNumeric data to get the value. */ llvm::Value* bires = DatumGetBINumericCodeGen(&builder, result); /* extract the header of result to check if it is BINumeric */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "biheader"); llvm::Value* rflag = builder.CreateAnd(tmpval, val_mask); /* extract the header of hashcell to check if it is BINumeric */ llvm::Value* real_cellval = builder.CreateAlignedLoad(cellval, 8, "cell_val"); llvm::Value* cellarg = builder.CreateIntToPtr(real_cellval, numericPtrType); tmpval = builder.CreateInBoundsGEP(cellarg, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "cellheader"); llvm::Value* lflag = builder.CreateAnd(tmpval, val_mask); /* check if either of them is not BI64 */ llvm::Value* oparg1 = builder.CreateICmpEQ(lflag, val_binum64); llvm::Value* oparg2 = builder.CreateICmpEQ(rflag, val_binum64); llvm::Value* bothbi64 = builder.CreateAnd(oparg1, oparg2); /* use fast path only when both args are bi64 */ builder.CreateCondBr(bothbi64, var_bisum_bb, var_numsum_bb); builder.SetInsertPoint(var_bisum_bb); /* extract the actual data of numeric only when cell is not null */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_1; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateBitCast(tmpval, int64PtrType); llvm::Value* resval = builder.CreateAlignedLoad(tmpval, 8, "value"); /* locate the restore value in hash cell by position */ llvm::Value* cell_addr = builder.CreateAdd(real_cellval, int64_6); cell_addr = builder.CreateIntToPtr(cell_addr, int64PtrType); llvm::Value* mid_cell_val = builder.CreateAlignedLoad(cell_addr, 8); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* aggres = builder.CreateCall(func_sadd_overflow, {resval, mid_cell_val}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, var_numsum_bb, normal_bb); /* if meet overflow during aggregation, turn to original sum function. */ builder.SetInsertPoint(var_numsum_bb); llvm::Function* func_vnumericsum = llvmCodeGen->module()->getFunction("Jitted_numericsum"); if (NULL == func_vnumericsum) { func_vnumericsum = numeric_sum_codegen(aggref); } builder.CreateCall(func_vnumericsum, {cell, hcxt, aggIdxList[i], result}); builder.CreateBr(agg_end); builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, cell_addr, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else if (fast_aggref) { /* * If aggref can be evaluated in fast path and be numeric * expressions, use the result from fastexpr. */ Assert(bisum_bb != NULL); Assert(numsum_bb != NULL); DEFINE_BLOCK(agg_end, jitted_batchagg); DEFINE_BLOCK(agg_then, jitted_batchagg); DEFINE_BLOCK(agg_else, jitted_batchagg); DEFINE_BLOCK(normal_bb, jitted_batchagg); DEFINE_BLOCK(expr_bisum_bb, jitted_batchagg); DEFINE_BLOCK(bioverflow_bb, jitted_batchagg); /* * if the result of expression is outofbound, turn to * original numeric path. */ builder.CreateBr(numsum_bb); /* * if the result of expression is BI64, extract it. */ builder.SetInsertPoint(bisum_bb); llvm::Value* resval = builder.CreateExtractValue(expres, 1); /* get the hash val : hashCell->m_val[aggidx].val */ Vals4[0] = int64_0; Vals4[1] = int32_pos_hcell_mval; Vals4[2] = aggIdxList[i]; Vals4[3] = int32_0; llvm::Value* cellval = builder.CreateInBoundsGEP(cell, Vals4); /* get the flag of the hash cell and check if it is NULL */ Vals4[3] = int32_1; llvm::Value* cellflag = builder.CreateInBoundsGEP(cell, Vals4); tmpval = builder.CreateAlignedLoad(cellflag, 1, "cellFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* cell be null, add Variable */ builder.SetInsertPoint(agg_then); /* get the aligned scale of this expression */ exprscale = GetAlignedScale(estate->expr); llvm::Value* alignedscale = llvmCodeGen->getIntConstant(CHAROID, exprscale); /* * should make a new context to record the result : the * following code corresponding to: * 'leftarg = DatumGetBINumeric(pVal[i]); * cell->m_val[idx].val = addVariable(context, NumericGetDatum(leftarg));'. */ tmpval = WrapmakeNumeric64CodeGen(&builder, resval, alignedscale); tmpval = DatumGetBINumericCodeGen(&builder, tmpval); tmpval = builder.CreatePtrToInt(tmpval, int64Type); tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, cellval, 8); builder.CreateAlignedStore(int8_0, cellflag, 1); /* turn to next basicblock or end this */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* cell not be null, do aggregation */ builder.SetInsertPoint(agg_else); llvm::Value* real_cellval = builder.CreateAlignedLoad(cellval, 8, "cell_val"); /* first make sure the value in cell is BI64 format */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; llvm::Value* cellarg = builder.CreateIntToPtr(real_cellval, numericPtrType); tmpval = builder.CreateInBoundsGEP(cellarg, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "cellheader"); llvm::Value* biflag = builder.CreateAnd(tmpval, val_mask); llvm::Value* isbi64 = builder.CreateICmpEQ(biflag, val_binum64); builder.CreateCondBr(isbi64, expr_bisum_bb, bioverflow_bb); /* * do aggregation directly only when both expr value * and cell value is bi64. */ builder.SetInsertPoint(expr_bisum_bb); llvm::Value* cell_ptr = builder.CreateAdd(real_cellval, int64_6); cell_ptr = builder.CreateIntToPtr(cell_ptr, int64PtrType); llvm::Value* mid_cell_val = builder.CreateAlignedLoad(cell_ptr, 8); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* aggres = builder.CreateCall(func_sadd_overflow, {resval, mid_cell_val}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, bioverflow_bb, normal_bb); builder.SetInsertPoint(bioverflow_bb); exprscale = GetAlignedScale(estate->expr); llvm::Value* ascale = llvmCodeGen->getIntConstant(CHAROID, exprscale); llvm::Value* bioverres = WrapmakeNumeric64CodeGen(&builder, resval, ascale); builder.CreateBr(numsum_bb); builder.SetInsertPoint(numsum_bb); llvm::PHINode* numres = builder.CreatePHI(int64Type, 2); numres->addIncoming(result, flag_then[i]); numres->addIncoming(bioverres, bioverflow_bb); llvm::Value* evalval = (llvm::Value*)numres; llvm::Function* func_vnumericsum = llvmCodeGen->module()->getFunction("Jitted_numericsum"); if (NULL == func_vnumericsum) { func_vnumericsum = numeric_sum_codegen(aggref); } builder.CreateCall(func_vnumericsum, {cell, hcxt, aggIdxList[i], evalval}); builder.CreateBr(agg_end); /* if there is no overflow, extract result directly */ builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, cell_ptr, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else { llvm::Function* func_vnumericsum = llvmCodeGen->module()->getFunction("Jitted_numericsum"); if (NULL == func_vnumericsum) { func_vnumericsum = numeric_sum_codegen(aggref); } builder.CreateCall(func_vnumericsum, {cell, hcxt, aggIdxList[i], result}); } } break; case NUMERICAVGFUNCOID: { /* * If aggref can be evaluated in fast path and just be * simple vars, use the result from batch. */ if (fast_aggref && (numSimpleVars > 0)) { DEFINE_BLOCK(agg_then, jitted_batchagg); DEFINE_BLOCK(agg_else, jitted_batchagg); DEFINE_BLOCK(agg_end, jitted_batchagg); DEFINE_BLOCK(normal_bb, jitted_batchagg); DEFINE_BLOCK(bisum_bblock, jitted_batchagg); DEFINE_BLOCK(numsum_bblock, jitted_batchagg); /* get the hash val : hashCell->m_val[aggidx].val */ Vals4[0] = int64_0; Vals4[1] = int32_pos_hcell_mval; Vals4[2] = aggIdxList[i]; Vals4[3] = int32_0; llvm::Value* cellval = builder.CreateInBoundsGEP(cell, Vals4); /* get the count of hash val : hashCell->m_val[aggidx + 1].val */ Vals4[2] = builder.CreateAdd(aggIdxList[i], int64_1, "val_plus"); llvm::Value* cellval2 = builder.CreateInBoundsGEP(cell, Vals4); /* get the flag of the hash cell and check if it is NULL */ Vals4[2] = aggIdxList[i]; Vals4[3] = int32_1; llvm::Value* cellflag = builder.CreateInBoundsGEP(cell, Vals4); /* get the flag of cell->m_val[idx + 1].flag */ Vals4[2] = builder.CreateAdd(aggIdxList[i], int64_1, "flag_plus"); llvm::Value* cellflag2 = builder.CreateInBoundsGEP(cell, Vals4); /* Now load the cell flag and check it */ tmpval = builder.CreateAlignedLoad(cellflag, 1, "cellFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* cell be null, add Variable */ builder.SetInsertPoint(agg_then); /* do leftarg = DatumGetBINumeric(pVal[i]) */ tmpval = DatumGetBINumericCodeGen(&builder, result); tmpval = builder.CreatePtrToInt(tmpval, int64Type); /* corresponding to addVariable(context, NumericGetDatum(leftarg)) */ tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, cellval, 8); /* count set to be one */ builder.CreateAlignedStore(int64_1, cellval2, 8); /* set cell flag */ builder.CreateAlignedStore(int8_0, cellflag, 1); builder.CreateAlignedStore(int8_0, cellflag2, 1); /* turn to next basicblock or end this */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* cell be not null, do aggregation */ builder.SetInsertPoint(agg_else); /* * When fast_aggref is true and numSimpleVars is greater than zero, * the expr is numeric type var. Convert this numeric type data to * SiNumeric data to get the value. */ llvm::Value* bires = DatumGetBINumericCodeGen(&builder, result); /* extract the header of result to check if it is BINumeric */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "biheader"); llvm::Value* rflag = builder.CreateAnd(tmpval, val_mask); /* extract the header of hashcell to check if it is BINumeric */ llvm::Value* real_cellval = builder.CreateAlignedLoad(cellval, 8, "cell_val"); llvm::Value* cellarg = builder.CreateIntToPtr(real_cellval, numericPtrType); tmpval = builder.CreateInBoundsGEP(cellarg, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "cellheader"); llvm::Value* lflag = builder.CreateAnd(tmpval, val_mask); /* check if either of them is not BI64 */ llvm::Value* oparg1 = builder.CreateICmpEQ(lflag, val_binum64); llvm::Value* oparg2 = builder.CreateICmpEQ(rflag, val_binum64); llvm::Value* bothbi64 = builder.CreateAnd(oparg1, oparg2); /* use fast path only when both args are bi64 */ builder.CreateCondBr(bothbi64, bisum_bblock, numsum_bblock); builder.SetInsertPoint(bisum_bblock); /* extract the actual data of numeric only when value is not null */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_1; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateBitCast(tmpval, int64PtrType); llvm::Value* resval = builder.CreateAlignedLoad(tmpval, 8, "value"); llvm::Value* cell_addr = builder.CreateAdd(real_cellval, int64_6); cell_addr = builder.CreateIntToPtr(cell_addr, int64PtrType); llvm::Value* mid_cell_val = builder.CreateAlignedLoad(cell_addr, 8); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* aggres = builder.CreateCall(func_sadd_overflow, {resval, mid_cell_val}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, numsum_bblock, normal_bb); builder.SetInsertPoint(numsum_bblock); llvm::Function* func_vnumericavg = llvmCodeGen->module()->getFunction("Jitted_numericavg"); if (NULL == func_vnumericavg) { func_vnumericavg = numeric_avg_codegen(aggref); } builder.CreateCall(func_vnumericavg, {cell, hcxt, aggIdxList[i], result}); builder.CreateBr(agg_end); builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, cell_addr, 8); /* cell->m_val[idx+1].val++ */ tmpval = builder.CreateAlignedLoad(cellval2, 8, "count"); tmpval = builder.CreateAdd(tmpval, int64_1); builder.CreateAlignedStore(tmpval, cellval2, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else if (fast_aggref) { /* * If aggref can be evaluated in fast path and be numeric * expressions, use the result from fastexpr. */ Assert(bisum_bb != NULL); Assert(numsum_bb != NULL); DEFINE_BLOCK(agg_end, jitted_batchagg); DEFINE_BLOCK(agg_then, jitted_batchagg); DEFINE_BLOCK(agg_else, jitted_batchagg); DEFINE_BLOCK(normal_bb, jitted_batchagg); DEFINE_BLOCK(expr_bisum_bb, jitted_batchagg); DEFINE_BLOCK(bioverflow_bb, jitted_batchagg); builder.CreateBr(numsum_bb); builder.SetInsertPoint(bisum_bb); llvm::Value* resval = builder.CreateExtractValue(expres, 1); /* get the hash val : hashCell->m_val[aggidx].val */ Vals4[0] = int64_0; Vals4[1] = int32_pos_hcell_mval; Vals4[2] = aggIdxList[i]; Vals4[3] = int32_0; llvm::Value* cellval = builder.CreateInBoundsGEP(cell, Vals4); /* get the count of hash val : hashCell->m_val[aggidx + 1].val */ Vals4[2] = builder.CreateAdd(aggIdxList[i], int64_1, "val_plus"); llvm::Value* cellval2 = builder.CreateInBoundsGEP(cell, Vals4); ; /* get the flag of the hash cell and check if it is NULL */ Vals4[2] = aggIdxList[i]; Vals4[3] = int32_1; llvm::Value* cellflag = builder.CreateInBoundsGEP(cell, Vals4); /* get the flag of cell->m_val[idx + 1].flag */ Vals4[2] = builder.CreateAdd(aggIdxList[i], int64_1, "flag_plus"); llvm::Value* cellflag2 = builder.CreateInBoundsGEP(cell, Vals4); tmpval = builder.CreateAlignedLoad(cellflag, 1, "cellFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* cell be null, add Variable */ builder.SetInsertPoint(agg_then); exprscale = GetAlignedScale(estate->expr); llvm::Value* alignedscale = llvmCodeGen->getIntConstant(CHAROID, exprscale); /* * should make a new context to record the result : the * following code corresponding to: * 'leftarg = DatumGetBINumeric(pVal[i]); * cell->m_val[idx].val = addVariable(context, NumericGetDatum(leftarg));'. */ tmpval = WrapmakeNumeric64CodeGen(&builder, resval, alignedscale); tmpval = DatumGetBINumericCodeGen(&builder, tmpval); tmpval = builder.CreatePtrToInt(tmpval, int64Type); tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, cellval, 8); /* count set to be one */ builder.CreateAlignedStore(int64_1, cellval2, 8); /* set the flag of hashcell */ builder.CreateAlignedStore(int8_0, cellflag, 1); builder.CreateAlignedStore(int8_0, cellflag2, 1); /* turn to next basicblock */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); builder.SetInsertPoint(agg_else); llvm::Value* real_cellval = builder.CreateAlignedLoad(cellval, 8, "cell_val"); /* first make sure the value in cell is BI64 format */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; llvm::Value* cellarg = builder.CreateIntToPtr(real_cellval, numericPtrType); tmpval = builder.CreateInBoundsGEP(cellarg, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "cellheader"); llvm::Value* biflag = builder.CreateAnd(tmpval, val_mask); llvm::Value* isbi64 = builder.CreateICmpEQ(biflag, val_binum64); builder.CreateCondBr(isbi64, expr_bisum_bb, bioverflow_bb); /* * do aggregation directly only when both expr value * and cell value is bi64. */ builder.SetInsertPoint(expr_bisum_bb); llvm::Value* cell_ptr = builder.CreateAdd(real_cellval, int64_6); cell_ptr = builder.CreateIntToPtr(cell_ptr, int64PtrType); llvm::Value* mid_cell_val = builder.CreateAlignedLoad(cell_ptr, 8); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* aggres = builder.CreateCall(func_sadd_overflow, {resval, mid_cell_val}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, bioverflow_bb, normal_bb); /* make numeric64 when meet overflow */ builder.SetInsertPoint(bioverflow_bb); exprscale = GetAlignedScale(estate->expr); llvm::Value* ascale = llvmCodeGen->getIntConstant(CHAROID, exprscale); llvm::Value* bioverres = WrapmakeNumeric64CodeGen(&builder, resval, ascale); builder.CreateBr(numsum_bb); builder.SetInsertPoint(numsum_bb); llvm::PHINode* numres = builder.CreatePHI(int64Type, 2); numres->addIncoming(result, flag_then[i]); numres->addIncoming(bioverres, bioverflow_bb); llvm::Value* evalval = (llvm::Value*)numres; llvm::Function* func_vnumericavg = llvmCodeGen->module()->getFunction("Jitted_numericavg"); if (NULL == func_vnumericavg) { func_vnumericavg = numeric_avg_codegen(aggref); } builder.CreateCall(func_vnumericavg, {cell, hcxt, aggIdxList[i], evalval}); builder.CreateBr(agg_end); /* if there is no overflow, extract result directly */ builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, cell_ptr, 8); /* cell->m_val[idx+1].val++ */ tmpval = builder.CreateAlignedLoad(cellval2, 8, "count"); tmpval = builder.CreateAdd(tmpval, int64_1); builder.CreateAlignedStore(tmpval, cellval2, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else { llvm::Function* func_vnumericavg = llvmCodeGen->module()->getFunction("Jitted_numericavg"); if (NULL == func_vnumericavg) { func_vnumericavg = numeric_avg_codegen(aggref); } builder.CreateCall(func_vnumericavg, {cell, hcxt, aggIdxList[i], result}); } } break; default: ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), errmodule(MOD_LLVM), errmsg("Unsupported agg function %u!", aggref->aggfnoid))); break; } if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* if the current flag is null, turn to next agg */ builder.SetInsertPoint(flag_else[i]); if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); } } /* codegen in the for_inc basic block: compare the loop index with nrows */ builder.SetInsertPoint(for_inc); tmpval = builder.CreateTrunc(idx_next, int32Type); tmpval = builder.CreateICmpEQ(tmpval, nValues); builder.CreateCondBr(tmpval, for_end, for_body); /* codegen in for_end basic block: just return void */ builder.SetInsertPoint(for_end); (void)VecExprCodeGen::MemCxtSwitToCodeGen(&builder, agg_oldcontext); WrapResetEContextCodeGen(&builder, mecontext); for (i = 0; i < numaggs; i++) { if (econtext[i]) WrapResetEContextCodeGen(&builder, econtext[i]); } builder.CreateRetVoid(); pfree_ext(aggIdxList); pfree_ext(agg_bb); pfree_ext(flag_then); pfree_ext(flag_else); pfree_ext(econtext); pfree_ext(batch_vals); pfree_ext(batch_flag); llvmCodeGen->FinalizeFunction(jitted_batchagg, node->ss.ps.plan->plan_node_id); return jitted_batchagg; } llvm::Function* VecHashAggCodeGen::HashBatchCodeGen(VecAggState* node, int idx, bool rehash) { GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Find and load the IR file from the installaion directory */ llvmCodeGen->loadIRFile(); /* Extract plan information from node */ VecAgg* vecagg = (VecAgg*)(node->ss.ps.plan); List* tlist = (outerPlan(vecagg))->targetlist; AttrNumber* keyIdx = vecagg->grpColIdx; AttrNumber key = keyIdx[idx] - 1; TargetEntry* tentry = (TargetEntry*)list_nth(tlist, key); int bpchar_len = 0; Assert(IsA(tentry->expr, Var) || IsA(tentry->expr, FuncExpr)); /* Hash batch value just according to the return type. */ Oid rettype = InvalidOid; switch (nodeTag(tentry->expr)) { case T_Var: { Var* var = (Var*)(tentry->expr); rettype = var->vartype; if (var->vartype == BPCHAROID) bpchar_len = var->vartypmod - VARHDRSZ; } break; case T_FuncExpr: { FuncExpr* funcexpr = (FuncExpr*)(tentry->expr); rettype = funcexpr->funcresulttype; } break; default: Assert(0); break; } /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); /* Define data types and some llvm consts */ DEFINE_CG_TYPE(int8Type, CHAROID); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(int8PtrType, CHAROID); DEFINE_CG_PTRTYPE(int16PtrType, INT2OID); DEFINE_CG_PTRTYPE(int32PtrType, INT4OID); DEFINE_CG_PTRTYPE(int64PtrType, INT8OID); DEFINE_CGVAR_INT8(int8_0, 0); DEFINE_CGVAR_INT8(int8_1, 1); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_2, 2); DEFINE_CGVAR_INT32(int32_4, 4); DEFINE_CGVAR_INT64(Datum_0, 0); llvm::Function* jitted_hashbatch = NULL; llvm::Value* llvmargs[3]; llvm::Value* hash_res1 = NULL; llvm::Value* hash_res2 = NULL; llvm::Value* lt0_hash = NULL; llvm::BasicBlock* EQ0_bb = NULL; /* Function definition and input parameters */ GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "JittedHashBatch", int32Type); fn_prototype.addArgument(GsCodeGen::NamedVariable("value", int64Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("flag", int8Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("hash_val", int32Type)); jitted_hashbatch = fn_prototype.generatePrototype(&builder, &llvmargs[0]); llvm::Value* pval = llvmargs[0]; llvm::Value* flag = llvmargs[1]; llvm::Value* hash_val = llvmargs[2]; llvm::BasicBlock* entry = &jitted_hashbatch->getEntryBlock(); DEFINE_BLOCK(be_not_null, jitted_hashbatch); DEFINE_BLOCK(be_null, jitted_hashbatch); DEFINE_BLOCK(end_null, jitted_hashbatch); builder.SetInsertPoint(entry); /* check the current value is null or not */ flag = builder.CreateAnd(flag, int8_1); llvm::Value* cmp = builder.CreateICmpEQ(flag, int8_0); builder.CreateCondBr(cmp, be_not_null, be_null); /* * corresponding to likely(NOT_NULL(flag[j])) branch in hashColT function. */ builder.SetInsertPoint(be_not_null); llvm::Module* mod = llvmCodeGen->module(); switch (rettype) { case INT4OID: { hash_res1 = hash_val; pval = builder.CreateTrunc(pval, int32Type); llvm_crc32_32_32(hash_res1, hash_res1, pval); } break; case INT8OID: case DATEOID: case TIMESTAMPOID: { hash_res1 = hash_val; llvm_crc32_32_64(hash_res1, hash_res1, pval); } break; case BPCHAROID: { int len = bpchar_len; llvm::Function* func_evalvar = llvmCodeGen->module()->getFunction("JittedEvalVarlena"); if (func_evalvar == NULL) { func_evalvar = VarlenaCvtCodeGen(); } llvm::Value* res = builder.CreateCall(func_evalvar, pval, "func_evalvar"); llvm::Value* data1 = builder.CreateExtractValue(res, 1); data1 = builder.CreatePtrToInt(data1, int64Type); data1 = builder.CreateIntToPtr(data1, int8PtrType); hash_res1 = hash_val; if (len >= 8) { int k = 0; llvm::Value* big_data = builder.CreateBitCast(data1, int64PtrType); llvm::Value* kidx = NULL; while (len >= 8) { kidx = llvmCodeGen->getIntConstant(INT4OID, k); pval = builder.CreateInBoundsGEP(big_data, kidx); pval = builder.CreateAlignedLoad(pval, 8, "bigdat"); llvm_crc32_32_64(hash_res1, hash_res1, pval); len = len - 8; k++; } kidx = llvmCodeGen->getIntConstant(INT4OID, k * 8); data1 = builder.CreateInBoundsGEP(data1, kidx); } if (len >= 4) { llvm::Value* data = builder.CreateBitCast(data1, int32PtrType); pval = builder.CreateInBoundsGEP(data, Datum_0); pval = builder.CreateAlignedLoad(pval, 4, "intdat"); llvm_crc32_32_32(hash_res1, hash_res1, pval); data1 = builder.CreateInBoundsGEP(data1, int32_4); len = len - 4; } if (len >= 2) { llvm::Value* short_data = builder.CreateBitCast(data1, int16PtrType); pval = builder.CreateInBoundsGEP(short_data, Datum_0); pval = builder.CreateAlignedLoad(pval, 2, "shortdat"); llvm_crc32_32_16(hash_res1, hash_res1, pval); data1 = builder.CreateInBoundsGEP(data1, int32_2); len = len - 2; } if (len == 1) { pval = builder.CreateAlignedLoad(data1, 1, "val_char"); llvm_crc32_32_8(hash_res1, hash_res1, pval); } } break; case VARCHAROID: case TEXTOID: { /* * Different from bpchar type, we should create hash table according * to the actual length of varchar or text. */ llvm::Value* cmpval = NULL; llvm::Value* data = NULL; llvm::Value* nxt_len = NULL; llvm::Value* nxt_pos = NULL; llvm::Value* nxt_hash = NULL; DEFINE_CGVAR_INT32(int32_8, 8); DEFINE_CGVAR_INT32(int32_4, 4); DEFINE_CGVAR_INT32(int32_2, 2); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_BLOCK(GE8_bb, jitted_hashbatch); DEFINE_BLOCK(end_GE8_bb, jitted_hashbatch); DEFINE_BLOCK(LT8_bb, jitted_hashbatch); DEFINE_BLOCK(GE4_bb, jitted_hashbatch); DEFINE_BLOCK(LT4_bb, jitted_hashbatch); DEFINE_BLOCK(GE2_bb, jitted_hashbatch); DEFINE_BLOCK(LT2_bb, jitted_hashbatch); DEFINE_BLOCK(EQ1_bb, jitted_hashbatch); if (NULL == EQ0_bb) { EQ0_bb = llvm::BasicBlock::Create(context, "EQ0_bb", jitted_hashbatch); } /* get the initial data and true length */ llvm::Function* func_evalvar = llvmCodeGen->module()->getFunction("JittedEvalVarlena"); if (func_evalvar == NULL) { func_evalvar = VarlenaCvtCodeGen(); } llvm::Value* res = builder.CreateCall(func_evalvar, pval, "func_evalvar"); llvm::Value* vlen = builder.CreateExtractValue(res, 0); llvm::Value* vdata = builder.CreateExtractValue(res, 1); vdata = builder.CreatePtrToInt(vdata, int64Type); vdata = builder.CreateIntToPtr(vdata, int8PtrType); /* set initial hash value */ hash_res1 = hash_val; llvm::Value* bighash = builder.CreateZExt(hash_res1, int64Type); llvm::Value* bigdata = builder.CreateBitCast(vdata, int64PtrType); /* check if the actual length is great than 9 */ cmpval = builder.CreateICmpSGE(vlen, int32_8, "if_ge8"); builder.CreateCondBr(cmpval, GE8_bb, LT8_bb); /* loop over the length until it is less than 8 */ builder.SetInsertPoint(GE8_bb); llvm::PHINode* phi_whl_len = builder.CreatePHI(int32Type, 2); llvm::PHINode* phi_whl_pos = builder.CreatePHI(int32Type, 2); llvm::PHINode* phi_whl_hash = builder.CreatePHI(int64Type, 2); llvm::Value* whl_len = (llvm::Value*)phi_whl_len; nxt_len = builder.CreateSub(whl_len, int32_8); phi_whl_len->addIncoming(vlen, be_not_null); phi_whl_len->addIncoming(nxt_len, GE8_bb); llvm::Value* whl_pos = (llvm::Value*)phi_whl_pos; nxt_pos = builder.CreateAdd(whl_pos, int32_1); phi_whl_pos->addIncoming(int32_0, be_not_null); phi_whl_pos->addIncoming(nxt_pos, GE8_bb); llvm::Value* whl_hash = (llvm::Value*)phi_whl_hash; whl_hash = builder.CreateTrunc(whl_hash, int32Type); /* compute hash value */ llvm::Value* whl_data = builder.CreateInBoundsGEP(bigdata, whl_pos); whl_data = builder.CreateAlignedLoad(whl_data, 8, "whl_data"); llvm_crc32_32_64(nxt_hash, whl_hash, whl_data); phi_whl_hash->addIncoming(bighash, be_not_null); nxt_hash = builder.CreateZExt(nxt_hash, int64Type); phi_whl_hash->addIncoming(nxt_hash, GE8_bb); /* increament pos and minimus the length */ cmpval = builder.CreateICmpSGE(nxt_len, int32_8); builder.CreateCondBr(cmpval, GE8_bb, end_GE8_bb); builder.SetInsertPoint(end_GE8_bb); llvm::Value* ge8_len = nxt_len; llvm::Value* ge8_hash = builder.CreateTrunc(nxt_hash, int32Type); llvm::Value* ge8_data = builder.CreateInBoundsGEP(bigdata, nxt_pos); ge8_data = builder.CreateBitCast(ge8_data, int8PtrType); builder.CreateBr(LT8_bb); builder.SetInsertPoint(LT8_bb); llvm::PHINode* phi_lt8_len = builder.CreatePHI(int32Type, 2); phi_lt8_len->addIncoming(vlen, be_not_null); phi_lt8_len->addIncoming(ge8_len, end_GE8_bb); llvm::Value* lt8_len = (llvm::Value*)phi_lt8_len; llvm::PHINode* phi_lt8_hash = builder.CreatePHI(int32Type, 2); phi_lt8_hash->addIncoming(hash_res1, be_not_null); phi_lt8_hash->addIncoming(ge8_hash, end_GE8_bb); llvm::Value* lt8_hash = (llvm::Value*)phi_lt8_hash; llvm::PHINode* phi_lt8_data = builder.CreatePHI(int8PtrType, 2); phi_lt8_data->addIncoming(vdata, be_not_null); phi_lt8_data->addIncoming(ge8_data, end_GE8_bb); llvm::Value* lt8_data = (llvm::Value*)phi_lt8_data; cmpval = builder.CreateICmpSGE(lt8_len, int32_4); builder.CreateCondBr(cmpval, GE4_bb, LT4_bb); /* if the actual length is greater than 4 and less than 8 */ builder.SetInsertPoint(GE4_bb); data = builder.CreateBitCast(lt8_data, int32PtrType); data = builder.CreateInBoundsGEP(data, Datum_0); data = builder.CreateAlignedLoad(data, 4, "intdat"); llvm::Value* ge4_hash = NULL; llvm_crc32_32_32(ge4_hash, lt8_hash, data); llvm::Value* ge4_data = builder.CreateInBoundsGEP(lt8_data, int32_4); llvm::Value* ge4_len = builder.CreateSub(lt8_len, int32_4); builder.CreateBr(LT4_bb); /* if the actual length is less than 4 */ builder.SetInsertPoint(LT4_bb); llvm::PHINode* phi_lt4_len = builder.CreatePHI(int32Type, 2); phi_lt4_len->addIncoming(lt8_len, LT8_bb); phi_lt4_len->addIncoming(ge4_len, GE4_bb); llvm::Value* lt4_len = (llvm::Value*)phi_lt4_len; llvm::PHINode* phi_lt4_hash = builder.CreatePHI(int32Type, 2); phi_lt4_hash->addIncoming(lt8_hash, LT8_bb); phi_lt4_hash->addIncoming(ge4_hash, GE4_bb); llvm::Value* lt4_hash = (llvm::Value*)phi_lt4_hash; llvm::PHINode* phi_lt4_data = builder.CreatePHI(int8PtrType, 2); phi_lt4_data->addIncoming(lt8_data, LT8_bb); phi_lt4_data->addIncoming(ge4_data, GE4_bb); llvm::Value* lt4_data = (llvm::Value*)phi_lt4_data; cmpval = builder.CreateICmpSGE(lt4_len, int32_2); builder.CreateCondBr(cmpval, GE2_bb, LT2_bb); /* if the length is greater than 2 and less than 4 */ builder.SetInsertPoint(GE2_bb); data = builder.CreateBitCast(lt4_data, int16PtrType); data = builder.CreateInBoundsGEP(data, Datum_0); data = builder.CreateAlignedLoad(data, 2, "shortdat"); llvm::Value* ge2_hash = NULL; llvm_crc32_32_16(ge2_hash, lt4_hash, data); llvm::Value* ge2_data = builder.CreateInBoundsGEP(lt4_data, int32_2); llvm::Value* ge2_len = builder.CreateSub(lt4_len, int32_2); builder.CreateBr(LT2_bb); /* if the length is less than 2 */ builder.SetInsertPoint(LT2_bb); llvm::PHINode* phi_lt2_len = builder.CreatePHI(int32Type, 2); phi_lt2_len->addIncoming(lt4_len, LT4_bb); phi_lt2_len->addIncoming(ge2_len, GE2_bb); llvm::Value* lt2_len = (llvm::Value*)phi_lt2_len; llvm::PHINode* phi_lt2_hash = builder.CreatePHI(int32Type, 2); phi_lt2_hash->addIncoming(lt4_hash, LT4_bb); phi_lt2_hash->addIncoming(ge2_hash, GE2_bb); llvm::Value* lt2_hash = (llvm::Value*)phi_lt2_hash; llvm::PHINode* phi_lt2_data = builder.CreatePHI(int8PtrType, 2); phi_lt2_data->addIncoming(lt4_data, LT4_bb); phi_lt2_data->addIncoming(ge2_data, GE2_bb); llvm::Value* lt2_data = (llvm::Value*)phi_lt2_data; cmpval = builder.CreateICmpEQ(lt2_len, int32_1); builder.CreateCondBr(cmpval, EQ1_bb, EQ0_bb); builder.SetInsertPoint(EQ1_bb); data = builder.CreateAlignedLoad(lt2_data, 1, "val_char"); llvm::Value* lt1_hash = NULL; llvm_crc32_32_8(lt1_hash, lt2_hash, data); builder.CreateBr(EQ0_bb); builder.SetInsertPoint(EQ0_bb); llvm::PHINode* phi_lt0_hash = builder.CreatePHI(int32Type, 2); phi_lt0_hash->addIncoming(lt2_hash, LT2_bb); phi_lt0_hash->addIncoming(lt1_hash, EQ1_bb); lt0_hash = (llvm::Value*)phi_lt0_hash; } break; default: ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmodule(MOD_LLVM), errmsg("Type %u is not supported yet in hashBatch", rettype))); break; } builder.CreateBr(end_null); /* * corresponding to the else branch: * if (!rehash) { hashRes[j] = 0 } */ builder.SetInsertPoint(be_null); if (!rehash) hash_res2 = int32_0; else hash_res2 = hash_val; builder.CreateBr(end_null); builder.SetInsertPoint(end_null); if (rettype != VARCHAROID && rettype != TEXTOID) { llvm::PHINode* Phi_hash = builder.CreatePHI(int32Type, 2); Phi_hash->addIncoming(hash_res1, be_not_null); Phi_hash->addIncoming(hash_res2, be_null); builder.CreateRet(Phi_hash); } else { llvm::PHINode* Phi_hash = builder.CreatePHI(int32Type, 2); Phi_hash->addIncoming(lt0_hash, EQ0_bb); Phi_hash->addIncoming(hash_res2, be_null); builder.CreateRet(Phi_hash); } llvmCodeGen->FinalizeFunction(jitted_hashbatch, node->ss.ps.plan->plan_node_id); return jitted_hashbatch; } llvm::Function* VecHashAggCodeGen::MatchOneKeyCodeGen(VecAggState* node, int idx) { GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Find and load the IR file from the installaion directory */ llvmCodeGen->loadIRFile(); /* Extract plan information from node */ VecAgg* vecagg = (VecAgg*)(node->ss.ps.plan); List* tlist = (outerPlan(vecagg))->targetlist; AttrNumber* keyIdx = vecagg->grpColIdx; AttrNumber key = keyIdx[idx] - 1; TargetEntry* tentry = (TargetEntry*)list_nth(tlist, key); Assert(IsA(tentry->expr, Var) || IsA(tentry->expr, FuncExpr)); Oid rettype = InvalidOid; int bpchar_len = 0; switch (nodeTag(tentry->expr)) { case T_Var: { Var* var = (Var*)(tentry->expr); rettype = var->vartype; if (var->vartype == BPCHAROID) bpchar_len = var->vartypmod - VARHDRSZ; } break; case T_FuncExpr: { FuncExpr* funcexpr = (FuncExpr*)(tentry->expr); rettype = funcexpr->funcresulttype; } break; default: Assert(0); break; } Var* var = (Var*)(tentry->expr); /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); /* Define data types and some llvm consts */ DEFINE_CG_TYPE(int8Type, CHAROID); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(hashCellPtrType, "struct.hashCell"); DEFINE_CGVAR_INT8(int8_0, 0); DEFINE_CGVAR_INT8(int8_1, 1); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT64(Datum_0, 0); DEFINE_CGVAR_INT64(Datum_1, 1); DEFINE_CGVAR_INT32(int32_pos_hcell_mval, pos_hcell_mval); llvm::Function* jitted_matchonekey = NULL; llvm::Value* tmpval = NULL; llvm::Value* llvmargs[4]; llvm::Value* Vals4[4] = {Datum_0, int32_0, int32_0, int32_0}; GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "JittedOneMatchKey", int64Type); fn_prototype.addArgument(GsCodeGen::NamedVariable("value", int64Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("flag", int8Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("hashcell", hashCellPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("keycell_idx", int32Type)); jitted_matchonekey = fn_prototype.generatePrototype(&builder, &llvmargs[0]); llvm::Value* pval = llvmargs[0]; llvm::Value* pflg = llvmargs[1]; llvm::Value* hashcell = llvmargs[2]; llvm::Value* keyidxincell = llvmargs[3]; DEFINE_BLOCK(bnot_null, jitted_matchonekey); DEFINE_BLOCK(may_null, jitted_matchonekey); DEFINE_BLOCK(both_null, jitted_matchonekey); DEFINE_BLOCK(one_null, jitted_matchonekey); DEFINE_BLOCK(check_end, jitted_matchonekey); keyidxincell = builder.CreateSExt(keyidxincell, int64Type); /* hashCell.m_val[keyidxincell].val / flag */ Vals4[0] = Datum_0; Vals4[1] = int32_pos_hcell_mval; Vals4[2] = keyidxincell; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(hashcell, Vals4); llvm::Value* keyval = builder.CreateAlignedLoad(tmpval, 8, "keyincell"); Vals4[3] = int32_1; tmpval = builder.CreateInBoundsGEP(hashcell, Vals4); llvm::Value* keyflg = builder.CreateAlignedLoad(tmpval, 1, "flagincell"); llvm::Value* cmp1 = NULL; llvm::Value* cmp2 = NULL; llvm::Value* cmpand = NULL; llvm::Value* cmpor = NULL; llvm::Value* res1 = NULL; llvm::Value* res2 = NULL; llvm::Value* res3 = NULL; pflg = builder.CreateAnd(pflg, int8_1); cmp1 = builder.CreateICmpEQ(pflg, int8_0); keyflg = builder.CreateAnd(keyflg, int8_1); cmp2 = builder.CreateICmpEQ(keyflg, int8_0); cmpand = builder.CreateAnd(cmp1, cmp2); cmpor = builder.CreateOr(cmp1, cmp2); builder.CreateCondBr(cmpand, bnot_null, may_null); builder.SetInsertPoint(bnot_null); switch (rettype) { case INT4OID: { /* * should first truncate the keyval to make sure we compare the * right value */ keyval = builder.CreateTrunc(keyval, int32Type); pval = builder.CreateTrunc(pval, int32Type); res1 = builder.CreateICmpEQ(keyval, pval); res1 = builder.CreateZExt(res1, int64Type); } break; case INT8OID: case DATEOID: case TIMESTAMPOID: { res1 = builder.CreateICmpEQ(keyval, pval); res1 = builder.CreateZExt(res1, int64Type); } break; case BPCHAROID: { /* first extract the char* value from Datum */ llvm::Function* func_evalvar = llvmCodeGen->module()->getFunction("JittedEvalVarlena"); if (func_evalvar == NULL) { func_evalvar = dorado::VarlenaCvtCodeGen(); } llvm::Value* vecval = builder.CreateCall(func_evalvar, pval, "evalbatchvar"); llvm::Value* vecdata = builder.CreateExtractValue(vecval, 1); llvm::Value* cellval = builder.CreateCall(func_evalvar, keyval, "evalcellvar"); llvm::Value* celldata = builder.CreateExtractValue(cellval, 1); /* call simple memcmp IR function */ llvm::Function* evalmemcmp = llvmCodeGen->module()->getFunction("LLVMIRmemcmp"); if (evalmemcmp == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_IR_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Failed on getting IR function : LLVMIRmemcmp!\n"))); } llvm::Value* lendat = llvmCodeGen->getIntConstant(INT4OID, bpchar_len); res1 = builder.CreateCall(evalmemcmp, {celldata, vecdata, lendat}, "memcmp"); } break; case TEXTOID: case VARCHAROID: { llvm::Function* func_evalvar = llvmCodeGen->module()->getFunction("JittedEvalVarlena"); if (func_evalvar == NULL) { func_evalvar = VarlenaCvtCodeGen(); } llvm::Value* vecval = builder.CreateCall(func_evalvar, pval, "evalbatchvar"); llvm::Value* veclen = builder.CreateExtractValue(vecval, 0); llvm::Value* vecdata = builder.CreateExtractValue(vecval, 1); llvm::Value* cellval = builder.CreateCall(func_evalvar, keyval, "evalcellvar"); llvm::Value* celllen = builder.CreateExtractValue(cellval, 0); llvm::Value* celldata = builder.CreateExtractValue(cellval, 1); llvm::Function* func_texteq_cc = llvmCodeGen->module()->getFunction("LLVMIRtexteq"); if (func_texteq_cc == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_IR_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Failed on getting IR function : LLVMIRtexteq!\n"))); } res1 = builder.CreateCall(func_texteq_cc, {veclen, vecdata, celllen, celldata}, "texteq"); } break; default: ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmodule(MOD_LLVM), (errmsg("Type %u is not supported yet in match_key", var->vartype)))); break; } builder.CreateBr(check_end); builder.SetInsertPoint(may_null); builder.CreateCondBr(cmpor, one_null, both_null); /* both null is equal */ builder.SetInsertPoint(both_null); res2 = Datum_1; builder.CreateBr(check_end); /* null not equal to non-null */ builder.SetInsertPoint(one_null); res3 = Datum_0; builder.CreateBr(check_end); builder.SetInsertPoint(check_end); llvm::PHINode* Phi_ret = builder.CreatePHI(int64Type, 3); Phi_ret->addIncoming(res1, bnot_null); Phi_ret->addIncoming(res2, both_null); Phi_ret->addIncoming(res3, one_null); builder.CreateRet(Phi_ret); llvmCodeGen->FinalizeFunction(jitted_matchonekey, node->ss.ps.plan->plan_node_id); return jitted_matchonekey; } llvm::Value* VecHashAggCodeGen::EvalFastExprInBatchAgg(ExprState* state, GsCodeGen::LlvmBuilder builder, llvm::Function* jitted_func, llvm::BasicBlock** bb_null, llvm::BasicBlock** bb_last, llvm::BasicBlock** bb_outofbound, llvm::Value* econtext, llvm::Value* argVector, llvm::Value* phi_idx) { dorado::GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; llvm::Module* mod = llvmCodeGen->module(); llvm::LLVMContext& context = llvmCodeGen->context(); DEFINE_CG_TYPE(int16Type, INT2OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(int64PtrType, INT8OID); DEFINE_CGVAR_INT8(int8_0, 0); DEFINE_CGVAR_INT8(int8_1, 1); DEFINE_CGVAR_INT16(int16_0, 0); DEFINE_CGVAR_INT16(val_numeric64, NUMERIC_64); DEFINE_CGVAR_INT16(val_bimask, NUMERIC_BI_MASK); DEFINE_CGVAR_INT16(val_scalemask, NUMERIC_BI_SCALEMASK); DEFINE_CGVAR_INT32(int32_pos_scalvec_vals, pos_scalvec_vals); DEFINE_CGVAR_INT32(int32_pos_scalvec_flag, pos_scalvec_flag); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT64(int64_0, 0); llvm::Value* tmpval = NULL; llvm::Value* cmpval = NULL; llvm::Value* phi_val = NULL; llvm::Value* result = NULL; llvm::Value* multi_bound = NULL; llvm::Value* left_scaled1 = NULL; llvm::Value* left_scaled2 = NULL; llvm::Value* right_scaled1 = NULL; llvm::Value* right_scaled2 = NULL; llvm::Value* resscale1 = NULL; llvm::Value* resscale2 = NULL; llvm::Value* res1 = NULL; llvm::Value* lval = NULL; llvm::Value* rval = NULL; llvm::PHINode* left_scaled = NULL; llvm::PHINode* right_scaled = NULL; llvm::PHINode* resscale = NULL; llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Value* Vals[2] = {int64_0, int32_0}; llvm::Value* Vals4[4] = {int64_0, int32_0, int32_0, int32_0}; /* create LLVM value with {uint16, int64} format */ llvm::Type* Elements[] = {int16Type, int64Type}; llvm::Type* SiNumeric64Type = llvm::StructType::create(context, Elements, "SiNumeric64"); if (*bb_last) builder.SetInsertPoint(*bb_last); switch (nodeTag(state->expr)) { case T_TargetEntry: { /* TargetEntry information is stored in GenericExprState */ GenericExprState* gstate = (GenericExprState*)state; ExprState* estate = gstate->arg; result = EvalFastExprInBatchAgg( estate, builder, jitted_func, bb_null, bb_last, bb_outofbound, econtext, argVector, phi_idx); } break; case T_OpExpr: { /* define the basic block needed here */ DEFINE_BLOCK(both_bi64, jitted_func); DEFINE_BLOCK(not_both_bi64, jitted_func); /* check if have special opexpr with one arg */ OpExpr* opexpr = (OpExpr*)state->expr; FuncExprState* fstate = (FuncExprState*)state; List* op_args = fstate->args; ExprState* lestate = (ExprState*)linitial(op_args); ExprState* restate = (ExprState*)lsecond(op_args); lval = EvalFastExprInBatchAgg( lestate, builder, jitted_func, bb_null, bb_last, bb_outofbound, econtext, argVector, phi_idx); rval = EvalFastExprInBatchAgg( restate, builder, jitted_func, bb_null, bb_last, bb_outofbound, econtext, argVector, phi_idx); if (*bb_last) builder.SetInsertPoint(*bb_last); /* Extract the header value to get the mask and scale */ llvm::Value* lheader = builder.CreateExtractValue(lval, 0); llvm::Value* rheader = builder.CreateExtractValue(rval, 0); llvm::Value* lvalscale = builder.CreateAnd(lheader, val_scalemask); llvm::Value* rvalscale = builder.CreateAnd(rheader, val_scalemask); llvm::Value* lvalmask = builder.CreateAnd(lheader, val_bimask); llvm::Value* lvalbi64 = builder.CreateICmpEQ(lvalmask, val_numeric64); llvm::Value* rvalmask = builder.CreateAnd(rheader, val_bimask); llvm::Value* rvalbi64 = builder.CreateICmpEQ(rvalmask, val_numeric64); llvm::Value* bebi64 = builder.CreateAnd(lvalbi64, rvalbi64); builder.CreateCondBr(bebi64, both_bi64, not_both_bi64); builder.SetInsertPoint(both_bi64); llvm::Value* leftval = builder.CreateExtractValue(lval, 1); llvm::Value* rightval = builder.CreateExtractValue(rval, 1); /* * adjust the scale and value, first define outofbound block */ if (opexpr->opno == NUMERICADDOID || opexpr->opno == NUMERICSUBOID) { /* Define basic block that needed only by NUMERICADDOID and NUMERICSUBOID*/ DEFINE_BLOCK(delta_large, jitted_func); DEFINE_BLOCK(delta_small, jitted_func); DEFINE_BLOCK(adjust_true, jitted_func); DEFINE_BLOCK(adjust_overflow_bb, jitted_func); DEFINE_BLOCK(adjust_normal_bb, jitted_func); llvm::Value* delta_scale = builder.CreateSub(lvalscale, rvalscale, "delta_scale"); /* check delta_scale >= 0 or not */ cmpval = builder.CreateICmpSGE(delta_scale, int16_0, "cmp_delta_scale"); builder.CreateCondBr(cmpval, delta_large, delta_small); /* corresponding to delta_scale >= 0 */ builder.SetInsertPoint(delta_large); /* tmpval = y < 0 ? y : -y */ tmpval = builder.CreateSub(int64_0, rightval); cmpval = builder.CreateICmpSLT(rightval, int64_0, "negative_cmp"); phi_val = builder.CreateSelect(cmpval, rightval, tmpval); left_scaled1 = leftval; llvm::Value* mulscale = ScaleMultiCodeGen(&builder, delta_scale); /* corresponding to y_scaled = y * ScaleMultipler[delta_scale] */ right_scaled1 = builder.CreateMul(rightval, mulscale); /* result_scale = x_scale */ resscale1 = lvalscale; multi_bound = GetInt64MulOutofBoundCodeGen(&builder, delta_scale); cmpval = builder.CreateICmpSGE(phi_val, multi_bound, "bound_check"); builder.CreateCondBr(cmpval, adjust_true, *bb_outofbound); builder.SetInsertPoint(delta_small); /* tmpval = x < 0 ? x : -x */ tmpval = builder.CreateSub(int64_0, leftval); cmpval = builder.CreateICmpSLT(leftval, int64_0, "negative_cmp"); phi_val = builder.CreateSelect(cmpval, leftval, tmpval); /* corresponding to x_scaled = x * ScaleMultipler[-delta_scale] */ llvm::Value* mdelta_scale = builder.CreateSub(int16_0, delta_scale); llvm::Value* mmulscale = ScaleMultiCodeGen(&builder, mdelta_scale); left_scaled2 = builder.CreateMul(leftval, mmulscale); right_scaled2 = rightval; /* result_scale = y_scale */ resscale2 = rvalscale; multi_bound = GetInt64MulOutofBoundCodeGen(&builder, mdelta_scale); cmpval = builder.CreateICmpSGE(phi_val, multi_bound, "bound_check"); builder.CreateCondBr(cmpval, adjust_true, *bb_outofbound); builder.SetInsertPoint(adjust_true); left_scaled = builder.CreatePHI(int64Type, 2); right_scaled = builder.CreatePHI(int64Type, 2); resscale = builder.CreatePHI(int16Type, 2); left_scaled->addIncoming(left_scaled1, delta_large); left_scaled->addIncoming(left_scaled2, delta_small); right_scaled->addIncoming(right_scaled1, delta_large); right_scaled->addIncoming(right_scaled2, delta_small); resscale->addIncoming(resscale1, delta_large); resscale->addIncoming(resscale2, delta_small); switch (opexpr->opno) { case NUMERICADDOID: { /* do bi64 add bi64, need to check if there is any overflow */ llvm::Function* func_sadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* res = builder.CreateCall(func_sadd_overflow, {left_scaled, right_scaled}, "sadd"); llvm::Value* overflow_flag = builder.CreateExtractValue(res, 1); builder.CreateCondBr(overflow_flag, adjust_overflow_bb, adjust_normal_bb); builder.SetInsertPoint(adjust_normal_bb); res1 = builder.CreateExtractValue(res, 0); /* create the result of current step */ result = llvm::UndefValue::get(SiNumeric64Type); llvm::Value* tmphead = builder.CreateAdd(val_numeric64, resscale); result = builder.CreateInsertValue(result, tmphead, 0); result = builder.CreateInsertValue(result, res1, 1); *bb_last = adjust_normal_bb; /* adjust true without out_of_bound */ builder.SetInsertPoint(adjust_overflow_bb); builder.CreateBr(*bb_outofbound); } break; case NUMERICSUBOID: { /* do bi64 add bi64, need to check if there is any overflow */ llvm::Function* func_ssub_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::ssub_with_overflow, Intrinsic_Tys); if (func_ssub_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sasub_with_overflow function!\n"))); } llvm::Value* res = builder.CreateCall(func_ssub_overflow, {left_scaled, right_scaled}, "sadd"); llvm::Value* overflow_flag = builder.CreateExtractValue(res, 1); builder.CreateCondBr(overflow_flag, adjust_overflow_bb, adjust_normal_bb); builder.SetInsertPoint(adjust_normal_bb); /* if not out of bound, return makeNumeric64(result, resScale) */ res1 = builder.CreateExtractValue(res, 0); /* create the result of current step */ result = llvm::UndefValue::get(SiNumeric64Type); llvm::Value* tmphead = builder.CreateAdd(val_numeric64, resscale); result = builder.CreateInsertValue(result, tmphead, 0); result = builder.CreateInsertValue(result, res1, 1); *bb_last = adjust_normal_bb; /* if overflow, turn to int128 type first */ builder.SetInsertPoint(adjust_overflow_bb); builder.CreateBr(*bb_outofbound); } break; default: Assert(0); break; } } else if (opexpr->opno == NUMERICMULOID) { DEFINE_BLOCK(mul64, jitted_func); DEFINE_BLOCK(mul128, jitted_func); DEFINE_CGVAR_INT16(maxInt64digitsNum, MAXINT64DIGIT); llvm::Value* res_scale = builder.CreateAdd(lvalscale, rvalscale, "res_scale"); /* check delta_scale >= MAX DIGITS INT64 NUMBER or not */ cmpval = builder.CreateICmpSLE(res_scale, maxInt64digitsNum, "cmp_delta_scale"); /* do bi64 mul bi64, need to check if there is any overflow */ llvm::Function* func_smul_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::smul_with_overflow, Intrinsic_Tys); if (func_smul_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::smul_with_overflow function!"))); } llvm::Value* res = builder.CreateCall(func_smul_overflow, {leftval, rightval}, "smul"); llvm::Value* overflow_flag = builder.CreateExtractValue(res, 1); llvm::Value* res_64 = builder.CreateExtractValue(res, 0); cmpval = builder.CreateAnd(cmpval, builder.CreateNot(overflow_flag)); builder.CreateCondBr(cmpval, mul64, mul128); builder.SetInsertPoint(mul64); res1 = res_64; /* create the result of current step */ result = llvm::UndefValue::get(SiNumeric64Type); llvm::Value* tmphead = builder.CreateAdd(val_numeric64, res_scale); result = builder.CreateInsertValue(result, tmphead, 0); result = builder.CreateInsertValue(result, res_64, 1); *bb_last = mul64; builder.SetInsertPoint(mul128); builder.CreateBr(*bb_outofbound); } else { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmodule(MOD_LLVM), errmsg("Unexpected operation %u!", opexpr->opno))); } /* * when one of the argument is bi128 : note that at most * one of the argument is bi128. */ builder.SetInsertPoint(not_both_bi64); builder.CreateBr(*bb_outofbound); } break; case T_Var: { /* Extract the variable from the batch */ Var* var = (Var*)(state->expr); llvm::Value* m_attno = llvmCodeGen->getIntConstant(INT8OID, var->varattno - 1); Vals[0] = m_attno; Vals[1] = int32_pos_scalvec_vals; tmpval = builder.CreateInBoundsGEP(argVector, Vals); tmpval = builder.CreateAlignedLoad(tmpval, 8, "m_vals"); tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); llvm::Value* res = builder.CreateAlignedLoad(tmpval, 8, "val"); Vals[1] = int32_pos_scalvec_flag; tmpval = builder.CreateInBoundsGEP(argVector, Vals); tmpval = builder.CreateAlignedLoad(tmpval, 8, "m_flag"); tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); llvm::Value* flg = builder.CreateAlignedLoad(tmpval, 1, "flag"); /* make sure the value is null or not */ llvm::Value* tmpNull = builder.CreateAnd(flg, int8_1); tmpval = builder.CreateICmpEQ(tmpNull, int8_0); DEFINE_BLOCK(var_bb, jitted_func); builder.CreateCondBr(tmpval, var_bb, *bb_null); builder.SetInsertPoint(var_bb); Assert(NUMERICOID == var->vartype); /* Since var is numeric type, turn numeric data to BInumeric */ result = llvm::UndefValue::get(SiNumeric64Type); llvm::Value* bires = DatumGetBINumericCodeGen(&builder, res); /* Extract the header data */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "header"); result = builder.CreateInsertValue(result, tmpval, 0); Vals4[3] = int32_1; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateBitCast(tmpval, int64PtrType); tmpval = builder.CreateAlignedLoad(tmpval, 8, "value"); result = builder.CreateInsertValue(result, tmpval, 1); *bb_last = var_bb; return result; } break; case T_Const: { Const* cst = (Const*)(state->expr); ScalarValue val = ScalarVector::DatumToScalar(cst->constvalue, cst->consttype, cst->constisnull); Assert(NUMERICOID == cst->consttype); /* The const value will not be NULL */ uint16 header; Datum numval; /* turn to {uint16, int64} format(Simple Numeric Type) */ /* First store mark bits and scale of big integer: * first 4 bits to distinguish bi64 and bi128, next * 4 bits are not used, the last 8 bits store the scale of bit integer */ Numeric arg = DatumGetBINumeric(val); header = arg->choice.n_header; numval = *(int64*)(arg->choice.n_bi.n_data); result = llvm::UndefValue::get(SiNumeric64Type); llvm::Value* hed = llvmCodeGen->getIntConstant(INT2OID, header); llvm::Value* res = llvmCodeGen->getIntConstant(INT8OID, numval); result = builder.CreateInsertValue(result, hed, 0); result = builder.CreateInsertValue(result, res, 1); return result; } break; default: Assert(0); break; } return result; } llvm::Value* VecHashAggCodeGen::EvalSimpleExprInBatchAgg( ExprState* state, GsCodeGen::LlvmBuilder builder, llvm::Value* econtext, llvm::Value* phi_idx, llvm::Value* isNull) { llvm::Value* result = NULL; switch (nodeTag(state->expr)) { case T_TargetEntry: { /* TargetEntry information is stored in GenericExprState */ GenericExprState* gstate = (GenericExprState*)state; ExprState* estate = gstate->arg; result = EvalSimpleExprInBatchAgg(estate, builder, econtext, phi_idx, isNull); } break; case T_Var: case T_Const: case T_OpExpr: { /* define llvmargs */ llvm::Value* llvmargs[3]; llvmargs[0] = econtext; llvmargs[1] = isNull; llvmargs[2] = phi_idx; /* * Prepare the parameters that needed by OpCodeGen. */ ExprCodeGenArgs args; args.exprstate = state; args.parent = NULL; args.builder = &builder; args.llvm_args = &llvmargs[0]; result = dorado::VecExprCodeGen::CodeGen(&args); } break; default: Assert(0); break; } return result; } llvm::Function* VecHashAggCodeGen::SonicBatchAggregationCodeGen(VecAggState* node, bool use_prefetch) { /* First get the basic information of VecAggState */ int numaggs = node->numaggs; Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; if (!BatchAggJittable(node, true)) return NULL; /* Find and load the IR file from the installaion directory */ llvmCodeGen->loadIRFile(); /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); llvm::Module* mod = llvmCodeGen->module(); int i; int exprscale = 0; bool fast_aggref = false; ExprState* estate = NULL; llvm::Value* nValues = NULL; llvm::Value* tmpval = NULL; llvm::Value* idx_next = NULL; llvm::Value* locidx = NULL; llvm::Value* result = NULL; llvm::Value* expres = NULL; llvm::Value* nbit = NULL; llvm::Value* arrIdx = NULL; llvm::Value* atomIdx = NULL; llvm::Value* atom = NULL; llvm::Value* cntatom = NULL; llvm::Value* atomsize = NULL; llvm::Value* data = NULL; llvm::Value* cntdata = NULL; llvm::Value* scount = NULL; llvm::Value* llvmargs[3]; Aggref* aggref = NULL; llvm::Function* jitted_sonicbatchagg = NULL; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int8Type, CHAROID); DEFINE_CG_TYPE(int16Type, INT2OID); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(int16PtrType, INT2OID); DEFINE_CG_PTRTYPE(int64PtrType, INT8OID); DEFINE_CG_PTRTYPE(ExprContextPtrType, "struct.ExprContext"); DEFINE_CG_PTRTYPE(vectorBatchPtrType, "class.VectorBatch"); DEFINE_CG_PTRTYPE(sonicEncodingDatumArrayPtrType, "class.SonicEncodingDatumArray"); DEFINE_CG_PTRTYPE(numericPtrType, "struct.NumericData"); DEFINE_CG_PTRTYPE(sonicHashAggPtrType, "class.SonicHashAgg"); /* create LLVM value with {uint16, int64} format type */ llvm::Type* Elements[] = {int16Type, int64Type}; llvm::Type* SiNumeric64Type = llvm::StructType::create(context, Elements, "SiNumeric64"); DEFINE_CGVAR_INT8(int8_0, 0); DEFINE_CGVAR_INT8(int8_1, 1); DEFINE_CGVAR_INT16(val_mask, NUMERIC_BI_MASK); DEFINE_CGVAR_INT16(val_binum64, NUMERIC_64); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT64(int64_0, 0); DEFINE_CGVAR_INT64(int64_1, 1); DEFINE_CGVAR_INT64(int64_6, 6); DEFINE_CGVAR_INT32(int32_pos_batch_marr, pos_batch_marr); DEFINE_CGVAR_INT32(int32_pos_scalvec_vals, pos_scalvec_vals); DEFINE_CGVAR_INT32(int32_pos_scalvec_flag, pos_scalvec_flag); DEFINE_CGVAR_INT32(int32_pos_ecxt_pertuple, pos_ecxt_pertuple); DEFINE_CGVAR_INT32(int32_pos_ecxt_outerbatch, pos_ecxt_outerbatch); DEFINE_CGVAR_INT32(int32_pos_shash_data, pos_shash_data); DEFINE_CGVAR_INT32(int32_pos_shash_sonichmemctl, pos_shash_sonichmemctl); DEFINE_CGVAR_INT32(int32_pos_shash_loc, pos_shash_loc); DEFINE_CGVAR_INT32(int32_pos_sonichmemctl_hcxt, pos_sonichmemctl_hcxt); DEFINE_CGVAR_INT32(int32_pos_shashagg_ecxt, pos_shashagg_ecxt); DEFINE_CGVAR_INT32(int32_pos_sdarray_nbit, pos_sdarray_nbit); DEFINE_CGVAR_INT32(int32_pos_sdarray_atomsize, pos_sdarray_atomsize); DEFINE_CGVAR_INT32(int32_pos_sdarray_arr, pos_sdarray_arr); DEFINE_CGVAR_INT32(int32_pos_atom_data, pos_atom_data); DEFINE_CGVAR_INT32(int32_pos_atom_nullflag, pos_atom_nullflag); /* llvm array values, used to represent the location of some element */ llvm::Value* Vals[2] = {int64_0, int32_0}; llvm::Value* Vals3[3] = {int64_0, int32_0, int32_0}; llvm::Value* Vals4[4] = {int64_0, int32_0, int32_0, int32_0}; llvm::Value** aggIdxList = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); llvm::Value** batch_vals = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); llvm::Value** batch_flag = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); llvm::Value** sdata = (llvm::Value**)palloc(sizeof(llvm::Value*) * numaggs); llvm::BasicBlock** agg_bb = (llvm::BasicBlock**)palloc(sizeof(llvm::BasicBlock*) * numaggs); llvm::BasicBlock** flag_then = (llvm::BasicBlock**)palloc(sizeof(llvm::BasicBlock*) * numaggs); llvm::BasicBlock** flag_else = (llvm::BasicBlock**)palloc(sizeof(llvm::BasicBlock*) * numaggs); GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "JittedSonicFastBatchAgg", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("sonicHashAgg", sonicHashAggPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("batch", vectorBatchPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("aggIdx", int16PtrType)); jitted_sonicbatchagg = fn_prototype.generatePrototype(&builder, &llvmargs[0]); llvm::Value* sonicHashAgg = llvmargs[0]; llvm::Value* batch = llvmargs[1]; llvm::Value* aggIdx = llvmargs[2]; /* parameter used to mark if this tuple is NULL or not */ llvm::Value* isNull = builder.CreateAlloca(int8Type); /* SonicHashAgg.SonicHash.SonicHashMemoryControl.hashContext */ Vals4[2] = int32_pos_shash_sonichmemctl; Vals4[3] = int32_pos_sonichmemctl_hcxt; llvm::Value* hcxt = builder.CreateInBoundsGEP(sonicHashAgg, Vals4); hcxt = builder.CreateAlignedLoad(hcxt, 8, "hashContext"); /* get the nrows of the batch */ tmpval = builder.CreateInBoundsGEP(batch, Vals); nValues = builder.CreateAlignedLoad(tmpval, 4, "m_rows"); /* get vectorBatch.m_arr of the batch */ Vals[0] = int64_0; Vals[1] = int32_pos_batch_marr; llvm::Value* argVector = builder.CreateInBoundsGEP(batch, Vals); argVector = builder.CreateAlignedLoad(argVector, 8, "m_arr"); /* pre-load all the expression context : node->ss.ps.ps_ExprContext*/ ExprContext* exprcontext = node->ss.ps.ps_ExprContext; llvm::Value* econtext = llvmCodeGen->CastPtrToLlvmPtr(ExprContextPtrType, exprcontext); /* get sonic data array structure : SonicHashAgg.SonicHash.SonicDatumArray** */ Vals3[2] = int32_pos_shash_data; llvm::Value* sdataptr = builder.CreateInBoundsGEP(sonicHashAgg, Vals3); sdataptr = builder.CreateAlignedLoad(sdataptr, 8, "sonicdatumarray"); /* get sonic data array structure: SonicHashAgg.SonicHash.m_loc */ Vals4[2] = int32_pos_shash_loc; Vals4[3] = int64_0; llvm::Value* loc = builder.CreateInBoundsGEP(sonicHashAgg, Vals4); /* define the basic block needed in the main process */ llvm::BasicBlock* entry = &jitted_sonicbatchagg->getEntryBlock(); DEFINE_BLOCK(for_body, jitted_sonicbatchagg); DEFINE_BLOCK(for_inc, jitted_sonicbatchagg); DEFINE_BLOCK(for_end, jitted_sonicbatchagg); /* get all addIdx of agg operators */ for (i = 0; i < numaggs; i++) { llvm::Value* tmpidx = llvmCodeGen->getIntConstant(INT4OID, i); tmpval = builder.CreateInBoundsGEP(aggIdx, tmpidx); tmpval = builder.CreateAlignedLoad(tmpval, 4, "aggIdx"); aggIdxList[i] = builder.CreateSExt(tmpval, int64Type); tmpval = builder.CreateInBoundsGEP(sdataptr, aggIdxList[i]); sdata[i] = builder.CreateAlignedLoad(tmpval, 8, "sonicdata"); agg_bb[i] = llvm::BasicBlock::Create(context, "agg_bb", jitted_sonicbatchagg); flag_then[i] = llvm::BasicBlock::Create(context, "flag_then", jitted_sonicbatchagg); flag_else[i] = llvm::BasicBlock::Create(context, "flag_else", jitted_sonicbatchagg); } /* * Start the main process for SonicHashAgg::batchaggregation, which has the following * pedudo code: * for (j = 0; j < nrows; j++){ * for (i = 0; i < m_aggNum; i++){ * peraggstate = peragg[numaggs - 1 - i]; * pbatch = ExecVecProject (peraggstate->evalproj) * AggregationOnScalar(aggInfo[i], &pbatch->m_arr[0], aggidx[i], m_Loc) * } * } */ builder.SetInsertPoint(entry); /* * First get the ecxt_per_tuple_memory, since we need to switch to this * memory context. */ /* SonicHashAgg.m_econtext */ Vals[1] = int32_pos_shashagg_ecxt; llvm::Value* mecontext = builder.CreateInBoundsGEP(sonicHashAgg, Vals); mecontext = builder.CreateAlignedLoad(mecontext, 8, "m_econtext"); Vals[1] = int32_pos_ecxt_pertuple; llvm::Value* agg_expr_context = builder.CreateInBoundsGEP(mecontext, Vals); agg_expr_context = builder.CreateAlignedLoad(agg_expr_context, 8, "agg_per_tuple_memory"); llvm::Value* agg_oldcontext = VecExprCodeGen::MemCxtSwitToCodeGen(&builder, agg_expr_context); /* * Load value and flag from batch before the batch loop when we have * simple vars in transition level. */ for (i = 0; i < numaggs; i++) { int numSimpleVars = 0; AggStatePerAgg peraggstate = &node->peragg[numaggs - i - 1]; aggref = (Aggref*)(peraggstate->aggref); ProjectionInfo* projInfo = (ProjectionInfo*)(peraggstate->evalproj); if (aggref->aggstage == 0 && aggref->aggfnoid != COUNTOID) { numSimpleVars = projInfo->pi_numSimpleVars; if (numSimpleVars > 0) { int* varNumbers = projInfo->pi_varNumbers; int varNumber = varNumbers[0] - 1; /* m_arr[varNumber].m_vals */ Vals[0] = llvmCodeGen->getIntConstant(INT8OID, varNumber); Vals[1] = int32_pos_scalvec_vals; tmpval = builder.CreateInBoundsGEP(argVector, Vals); tmpval = builder.CreateAlignedLoad(tmpval, 8, "m_vals"); batch_vals[i] = tmpval; /* m_arr[varNumber].m_flag */ Vals[1] = int32_pos_scalvec_flag; llvm::Value* argFlag = builder.CreateInBoundsGEP(argVector, Vals); argFlag = builder.CreateAlignedLoad(argFlag, 1, "m_flag"); batch_flag[i] = argFlag; } else { batch_vals[i] = NULL; batch_flag[i] = NULL; } } } tmpval = builder.CreateICmpSGT(nValues, int32_0); builder.CreateCondBr(tmpval, for_body, for_end); builder.SetInsertPoint(for_body); llvm::PHINode* phi_idx = builder.CreatePHI(int64Type, 2); /* after each loop, index plus one */ idx_next = builder.CreateAdd(phi_idx, int64_1); phi_idx->addIncoming(int64_0, entry); phi_idx->addIncoming(idx_next, for_inc); /* get location : loc[i] (see vsnumeric_sum and vsint8_sum) and check if it is zero */ tmpval = builder.CreateInBoundsGEP(loc, phi_idx); locidx = builder.CreateAlignedLoad(tmpval, 4, "loc_i"); tmpval = builder.CreateICmpEQ(locidx, int32_0); builder.CreateCondBr(tmpval, for_inc, agg_bb[0]); /* loop over the numaggs */ int numSimpleVars = 0; for (i = 0; i < numaggs; i++) { llvm::BasicBlock* bisum_bb = NULL; llvm::BasicBlock* numsum_bb = NULL; /* the inverse order */ AggStatePerAgg peraggstate = &node->peragg[numaggs - i - 1]; aggref = (Aggref*)(peraggstate->aggref); ProjectionInfo* projInfo = (ProjectionInfo*)(peraggstate->evalproj); /* start the codegeneration for each aggregation */ builder.SetInsertPoint(agg_bb[i]); if (aggref->aggfnoid == NUMERICAVGFUNCOID) { /* get scount = sdata[aggidx + 1] */ llvm::Value* aggplus = builder.CreateAdd(aggIdxList[i], int64_1); tmpval = builder.CreateInBoundsGEP(sdataptr, aggplus); scount = builder.CreateAlignedLoad(tmpval, 8, "scount"); } if (aggref->aggstage == 0) { if (aggref->aggfnoid != COUNTOID) { Assert(peraggstate->evalproj != NULL); AggrefExprState* aggexprstate = peraggstate->aggrefstate; /* check if current expression can be codegened in fast path or not */ estate = (ExprState*)linitial(aggexprstate->args); fast_aggref = AggRefFastJittable(estate); /* * Do not consider collection and finalization level for * numeric_avg to avoid deconstruct_array. */ if (aggref->aggfnoid == NUMERICAVGFUNCOID && aggref->aggstage > 0) fast_aggref = false; /* If the current agg expression is just a simple var, * load it from the batch directly */ if (projInfo == NULL) { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Unexpected NULL project information."))); } numSimpleVars = projInfo->pi_numSimpleVars; if (numSimpleVars > 0) { /* m_arr[varNumber].m_vals */ tmpval = batch_vals[i]; tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); result = builder.CreateAlignedLoad(tmpval, 8, "val"); /* m_arr[varNumber].m_flag */ tmpval = batch_flag[i]; tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); tmpval = builder.CreateAlignedLoad(tmpval, 1, "flag"); builder.CreateAlignedStore(tmpval, isNull, 1); } else { /* set the batch information : econtext->ecxt_outerbatch = batch */ Vals[0] = int64_0; Vals[1] = int32_pos_ecxt_outerbatch; llvm::Value* tmp_outerbatch = builder.CreateInBoundsGEP(econtext, Vals); builder.CreateAlignedStore(batch, tmp_outerbatch, 8); /* * If fast_aggref is true, we could try to evaluate the * expression value by using BI64 all the way, and turn * to original path once meet outofbound. */ if (fast_aggref) { llvm::BasicBlock* bb_last = builder.GetInsertBlock(); DEFINE_BLOCK(bb_null, jitted_sonicbatchagg); DEFINE_BLOCK(bb_outofbound, jitted_sonicbatchagg); if (NULL == bisum_bb) { bisum_bb = llvm::BasicBlock::Create(context, "bisum_bb", jitted_sonicbatchagg); } if (NULL == numsum_bb) { numsum_bb = llvm::BasicBlock::Create(context, "numsum_bb", jitted_sonicbatchagg); } /* evaluate expression result */ llvm::Value* tmpexpres = EvalFastExprInBatchAgg(estate, builder, jitted_sonicbatchagg, &bb_null, &bb_last, &bb_outofbound, econtext, argVector, phi_idx); /* expres is already in {int16, int64} format */ builder.SetInsertPoint(bb_last); builder.CreateAlignedStore(int8_0, isNull, 1); llvm::Value* tmp_scale = builder.CreateExtractValue(tmpexpres, 0); llvm::Value* tmp_value = builder.CreateExtractValue(tmpexpres, 1); expres = llvm::UndefValue::get(SiNumeric64Type); expres = builder.CreateInsertValue(expres, tmp_scale, 0); expres = builder.CreateInsertValue(expres, tmp_value, 1); builder.CreateBr(bisum_bb); /* construct a null value, and no need to do aggregation */ builder.SetInsertPoint(bb_null); builder.CreateAlignedStore(int8_1, isNull, 1); if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* if result can not be represented in BI64, turn to * the original path */ builder.SetInsertPoint(bb_outofbound); /* Turn to per_tuple_memory to evaluate expression. */ Vals[0] = int64_0; Vals[1] = int32_pos_ecxt_pertuple; llvm::Value* curr_Context = builder.CreateInBoundsGEP(econtext, Vals); curr_Context = builder.CreateAlignedLoad(curr_Context, 8, "per_tuple_memory"); llvm::Value* cg_oldContext = VecExprCodeGen::MemCxtSwitToCodeGen(&builder, curr_Context); result = EvalSimpleExprInBatchAgg(estate, builder, econtext, phi_idx, isNull); /* return back to the old memory context */ (void)VecExprCodeGen::MemCxtSwitToCodeGen(&builder, cg_oldContext); } else { /* * corresponding to ExecVecProject(peraggstate->evalproj) : * to evaluate expressions, we should turn to per_tuple_memory. */ Vals[0] = int64_0; Vals[1] = int32_pos_ecxt_pertuple; llvm::Value* curr_Context = builder.CreateInBoundsGEP(econtext, Vals); curr_Context = builder.CreateAlignedLoad(curr_Context, 8, "per_tuple_memory"); llvm::Value* cg_oldContext = VecExprCodeGen::MemCxtSwitToCodeGen(&builder, curr_Context); /* corresponding to ExecVecProject(peraggstate->evalproj) */ result = EvalSimpleExprInBatchAgg(estate, builder, econtext, phi_idx, isNull); /* return back to the old memory context */ (void)VecExprCodeGen::MemCxtSwitToCodeGen(&builder, cg_oldContext); } } } else { /* * When current stage is transaction and aggfnoid is COUNTOID, no need to * load any batch information. since we only need to plus one when cell * is not null. */ result = int64_0; builder.CreateAlignedStore(int8_0, isNull, 1); } } else { /* * When aggref->stage is not transiction, the aggref expr is always * be var, so get the value from batch directly (projInfo is not null). */ if (projInfo != NULL) { int* varNumbers = projInfo->pi_varNumbers; int varNumber = varNumbers[0] - 1; /* m_arr[varNumber].m_vals */ Vals[0] = llvmCodeGen->getIntConstant(INT8OID, varNumber); Vals[1] = int32_pos_scalvec_vals; tmpval = builder.CreateInBoundsGEP(argVector, Vals); tmpval = builder.CreateAlignedLoad(tmpval, 8, "m_vals"); tmpval = builder.CreateInBoundsGEP(tmpval, phi_idx); result = builder.CreateAlignedLoad(tmpval, 8, "val"); /* m_arr[varNumber].m_flag */ Vals[1] = int32_pos_scalvec_flag; llvm::Value* argFlag = builder.CreateInBoundsGEP(argVector, Vals); argFlag = builder.CreateAlignedLoad(argFlag, 1, "m_flag"); tmpval = builder.CreateInBoundsGEP(argFlag, phi_idx); tmpval = builder.CreateAlignedLoad(tmpval, 1, "flag"); builder.CreateAlignedStore(tmpval, isNull, 1); } else { ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), errmodule(MOD_LLVM), errmsg("Unexpected NULL project information."))); } } /* Compute Aggregation */ if (aggref->aggfnoid == COUNTOID) { flag_then[i]->eraseFromParent(); flag_else[i]->eraseFromParent(); char* Jittedname = NULL; if (aggref->aggstage == 0) Jittedname = "Jitted_scount_0"; else Jittedname = "Jitted_scount_1"; llvm::Function* func_vscount = llvmCodeGen->module()->getFunction(Jittedname); if (NULL == func_vscount) { func_vscount = vsonic_count_codegen(aggref); } builder.CreateCall(func_vscount, {sdata[i], locidx, result}); if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); } else { /* * now we already get sonic datum (data and locidx) and pVector(result), check * the flag and do aggregation. */ /* see if IS_NULL(flag[phi_idx]) == false */ llvm::Value* tmpnull = builder.CreateAlignedLoad(isNull, 1, "tmpnull"); tmpnull = builder.CreateAnd(tmpnull, int8_1); llvm::Value* flag_cmp = builder.CreateICmpEQ(tmpnull, int8_0); builder.CreateCondBr(flag_cmp, flag_then[i], flag_else[i]); /* only do when not null */ builder.SetInsertPoint(flag_then[i]); switch (aggref->aggfnoid) { case NUMERICSUMFUNCOID: { /* * If aggref can be evaluated in fast path and just be * simple vars, use the result from batch. */ if (fast_aggref && (numSimpleVars > 0)) { DEFINE_BLOCK(agg_then, jitted_sonicbatchagg); DEFINE_BLOCK(agg_else, jitted_sonicbatchagg); DEFINE_BLOCK(agg_end, jitted_sonicbatchagg); DEFINE_BLOCK(normal_bb, jitted_sonicbatchagg); DEFINE_BLOCK(var_bisum_bb, jitted_sonicbatchagg); DEFINE_BLOCK(var_numsum_bb, jitted_sonicbatchagg); /* get sonic data according to sonic datum array and locidx */ /* calculate arrIdx and atomIdx */ Vals[0] = int64_0; Vals[1] = int32_pos_sdarray_nbit; nbit = builder.CreateInBoundsGEP(sdata[i], Vals); nbit = builder.CreateAlignedLoad(nbit, 4, "arridx"); arrIdx = builder.CreateLShr(locidx, nbit); arrIdx = builder.CreateSExt(arrIdx, int64Type); Vals[1] = int32_pos_sdarray_atomsize; atomsize = builder.CreateInBoundsGEP(sdata[i], Vals); atomsize = builder.CreateAlignedLoad(atomsize, 4, "atomsize"); atomsize = builder.CreateSub(atomsize, int32_1); atomIdx = builder.CreateAnd(atomsize, locidx); atomIdx = builder.CreateSExt(atomIdx, int64Type); /* get sdata[i]->m_arr[arrIdx] */ Vals[1] = int32_pos_sdarray_arr; atom = builder.CreateInBoundsGEP(sdata[i], Vals); atom = builder.CreateAlignedLoad(atom, 8, "atomptr"); atom = builder.CreateInBoundsGEP(atom, arrIdx); atom = builder.CreateAlignedLoad(atom, 8, "atom"); /* get address of sdata->m_arr[arrIdx]->data[atomIdx] */ Vals[1] = int32_pos_atom_data; data = builder.CreateInBoundsGEP(atom, Vals); data = builder.CreateAlignedLoad(data, 8, "data"); data = builder.CreateBitCast(data, int64PtrType); llvm::Value* dataptr = builder.CreateInBoundsGEP(data, atomIdx); /* get sdata->m_arr[arrIdx]->nullFlag[atomIdx] and check if it is NULL */ Vals[1] = int32_pos_atom_nullflag; llvm::Value* nullflag = builder.CreateInBoundsGEP(atom, Vals); nullflag = builder.CreateAlignedLoad(nullflag, 8, "nullflagptr"); nullflag = builder.CreateInBoundsGEP(nullflag, atomIdx); /* load nullflag */ tmpval = builder.CreateAlignedLoad(nullflag, 1, "nullFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* sonic data is null, set value */ builder.SetInsertPoint(agg_then); /* * should make a new context to record the result : the * following code corresponding to: * 'leftarg = DatumGetBINumeric(pVal[i]); * data->setValue(NumericGetDatum(leftarg, false, arrIndx, atomIdx))'. */ tmpval = DatumGetBINumericCodeGen(&builder, result); tmpval = builder.CreatePtrToInt(tmpval, int64Type); tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, dataptr, 8); builder.CreateAlignedStore(int8_0, nullflag, 1); /* turn to next basicblock or end this */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* cell be not null, do aggregation */ builder.SetInsertPoint(agg_else); /* * When fast_aggref is true and numSimpleVars is greater than zero, * the expr is numeric type var. Convert this numeric type data to * SiNumeric data to get the value. */ llvm::Value* bires = DatumGetBINumericCodeGen(&builder, result); /* extract the header of result to check if it is BINumeric */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "biheader"); llvm::Value* rflag = builder.CreateAnd(tmpval, val_mask); /* get leftdata[0] and convert to BINumeric */ llvm::Value* realdata = builder.CreateAlignedLoad(dataptr, 8, "dataval"); llvm::Value* leftarg = DatumGetBINumericCodeGen(&builder, realdata); tmpval = builder.CreateInBoundsGEP(leftarg, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "sonicheader"); llvm::Value* lflag = builder.CreateAnd(tmpval, val_mask); /* check if either of them is not BI64 */ llvm::Value* oparg1 = builder.CreateICmpEQ(lflag, val_binum64); llvm::Value* oparg2 = builder.CreateICmpEQ(rflag, val_binum64); llvm::Value* bothbi64 = builder.CreateAnd(oparg1, oparg2); /* use fast path only when both args are bi64 */ builder.CreateCondBr(bothbi64, var_bisum_bb, var_numsum_bb); builder.SetInsertPoint(var_bisum_bb); /* extract the actual data of numeric only when sonic data is not null */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_1; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateBitCast(tmpval, int64PtrType); llvm::Value* resval = builder.CreateAlignedLoad(tmpval, 8, "bivalue"); /* extrac the actual data of leftdata and do sum */ llvm::Value* sonicptr = builder.CreateAdd(realdata, int64_6); sonicptr = builder.CreateIntToPtr(sonicptr, int64PtrType); llvm::Value* midval = builder.CreateAlignedLoad(sonicptr, 8, "intvalue"); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sonicadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sonicadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* aggres = builder.CreateCall(func_sonicadd_overflow, {resval, midval}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, var_numsum_bb, normal_bb); /* if meet overflow during aggregation, turn to original sum function. */ builder.SetInsertPoint(var_numsum_bb); llvm::Function* func_vsnumericsum = llvmCodeGen->module()->getFunction("Jitted_sonic_numericsum"); if (NULL == func_vsnumericsum) { func_vsnumericsum = vsnumeric_sum_codegen(aggref); } sdata[i] = builder.CreateBitCast(sdata[i], sonicEncodingDatumArrayPtrType); builder.CreateCall(func_vsnumericsum, {sdata[i], locidx, result}); builder.CreateBr(agg_end); builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, sonicptr, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else if (fast_aggref) { /* * If aggref can be evaluated in fast path and be numeric * expressions, use the result from fastexpr. */ Assert(bisum_bb != NULL); Assert(numsum_bb != NULL); DEFINE_BLOCK(agg_end, jitted_sonicbatchagg); DEFINE_BLOCK(agg_then, jitted_sonicbatchagg); DEFINE_BLOCK(agg_else, jitted_sonicbatchagg); DEFINE_BLOCK(normal_bb, jitted_sonicbatchagg); DEFINE_BLOCK(expr_bisum_bb, jitted_sonicbatchagg); DEFINE_BLOCK(bioverflow_bb, jitted_sonicbatchagg); /* * if the result of expression is outofbound, turn to * original numeric path. */ builder.CreateBr(numsum_bb); /* * if the result of expression is BI64, extract it. */ builder.SetInsertPoint(bisum_bb); llvm::Value* resval = builder.CreateExtractValue(expres, 1); /* get sonic data according to sonic datum array and locidx */ /* calculate arrIdx and atomIdx */ Vals[1] = int32_pos_sdarray_nbit; nbit = builder.CreateInBoundsGEP(sdata[i], Vals); nbit = builder.CreateAlignedLoad(nbit, 4, "arridx"); arrIdx = builder.CreateLShr(locidx, nbit); arrIdx = builder.CreateSExt(arrIdx, int64Type); Vals[1] = int32_pos_sdarray_atomsize; atomsize = builder.CreateInBoundsGEP(sdata[i], Vals); atomsize = builder.CreateAlignedLoad(atomsize, 4, "atomsize"); atomsize = builder.CreateSub(atomsize, int32_1); atomIdx = builder.CreateAnd(atomsize, locidx); atomIdx = builder.CreateSExt(atomIdx, int64Type); /* get sdata[aggidx]->m_arr[arrIdx] */ Vals[1] = int32_pos_sdarray_arr; atom = builder.CreateInBoundsGEP(sdata[i], Vals); atom = builder.CreateAlignedLoad(atom, 8, "atomptr"); atom = builder.CreateInBoundsGEP(atom, arrIdx); atom = builder.CreateAlignedLoad(atom, 8, "atom"); /* get address of sdata[aggidx]->m_arr[arrIdx]->data[atomIdx] */ Vals[1] = int32_pos_atom_data; data = builder.CreateInBoundsGEP(atom, Vals); data = builder.CreateAlignedLoad(data, 8, "data"); data = builder.CreateBitCast(data, int64PtrType); llvm::Value* dataptr = builder.CreateInBoundsGEP(data, atomIdx); /* get sdata[aggidx]->m_arr[arrIdx]->nullFlag[atomIdx] and check if it is NULL */ Vals[1] = int32_pos_atom_nullflag; llvm::Value* nullflag = builder.CreateInBoundsGEP(atom, Vals); nullflag = builder.CreateAlignedLoad(nullflag, 8, "nullflagptr"); nullflag = builder.CreateInBoundsGEP(nullflag, atomIdx); /* load nullflag */ tmpval = builder.CreateAlignedLoad(nullflag, 1, "nullFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* sonic data is null, set value */ builder.SetInsertPoint(agg_then); /* get the aligned scale of this expression */ exprscale = GetAlignedScale(estate->expr); llvm::Value* alignedscale = llvmCodeGen->getIntConstant(CHAROID, exprscale); /* * should make a new context to record the result : the * following code corresponding to: * 'leftarg = DatumGetBINumeric(pVal[i]); * cell->m_val[idx].val = addVariable(context, NumericGetDatum(leftarg));'. */ tmpval = WrapmakeNumeric64CodeGen(&builder, resval, alignedscale); tmpval = DatumGetBINumericCodeGen(&builder, tmpval); tmpval = builder.CreatePtrToInt(tmpval, int64Type); tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, dataptr, 8); builder.CreateAlignedStore(int8_0, nullflag, 1); /* turn to next basicblock or end this */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* cell not be null, do aggregation */ builder.SetInsertPoint(agg_else); llvm::Value* realdata = builder.CreateAlignedLoad(dataptr, 8, "dataval"); /* first make sure the value in sonic datum array is BI64 format */ llvm::Value* leftarg = DatumGetBINumericCodeGen(&builder, realdata); Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(leftarg, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "leftheader"); llvm::Value* biflag = builder.CreateAnd(tmpval, val_mask); llvm::Value* isbi64 = builder.CreateICmpEQ(biflag, val_binum64); builder.CreateCondBr(isbi64, expr_bisum_bb, bioverflow_bb); /* * do aggregation directly only when both expr value * and cell value is bi64. */ builder.SetInsertPoint(expr_bisum_bb); llvm::Value* sonicptr = builder.CreateAdd(realdata, int64_6); sonicptr = builder.CreateIntToPtr(sonicptr, int64PtrType); llvm::Value* midval = builder.CreateAlignedLoad(sonicptr, 8, "intvalue"); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sonicadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sonicadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!"))); } llvm::Value* aggres = builder.CreateCall(func_sonicadd_overflow, {resval, midval}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, bioverflow_bb, normal_bb); builder.SetInsertPoint(bioverflow_bb); exprscale = GetAlignedScale(estate->expr); llvm::Value* ascale = llvmCodeGen->getIntConstant(CHAROID, exprscale); llvm::Value* bioverres = WrapmakeNumeric64CodeGen(&builder, resval, ascale); builder.CreateBr(numsum_bb); builder.SetInsertPoint(numsum_bb); llvm::PHINode* numres = builder.CreatePHI(int64Type, 2); numres->addIncoming(result, flag_then[i]); numres->addIncoming(bioverres, bioverflow_bb); llvm::Value* evalval = (llvm::Value*)numres; llvm::Function* func_vsnumericsum = llvmCodeGen->module()->getFunction("Jitted_sonic_numericsum"); if (NULL == func_vsnumericsum) { func_vsnumericsum = vsnumeric_sum_codegen(aggref); } sdata[i] = builder.CreateBitCast(sdata[i], sonicEncodingDatumArrayPtrType); builder.CreateCall(func_vsnumericsum, {sdata[i], locidx, evalval}); builder.CreateBr(agg_end); /* if there is no overflow, extract result directly */ builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, sonicptr, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else { llvm::Function* func_vsnumericsum = llvmCodeGen->module()->getFunction("Jitted_sonic_numericsum"); if (NULL == func_vsnumericsum) { func_vsnumericsum = vsnumeric_sum_codegen(aggref); } sdata[i] = builder.CreateBitCast(sdata[i], sonicEncodingDatumArrayPtrType); builder.CreateCall(func_vsnumericsum, {sdata[i], locidx, result}); } } break; case NUMERICAVGFUNCOID: { /* * If aggref can be evaluated in fast path and just be * simple vars, use the result from batch. */ if (fast_aggref && (numSimpleVars > 0)) { DEFINE_BLOCK(agg_then, jitted_sonicbatchagg); DEFINE_BLOCK(agg_else, jitted_sonicbatchagg); DEFINE_BLOCK(agg_end, jitted_sonicbatchagg); DEFINE_BLOCK(normal_bb, jitted_sonicbatchagg); DEFINE_BLOCK(bisum_bblock, jitted_sonicbatchagg); DEFINE_BLOCK(numsum_bblock, jitted_sonicbatchagg); /* get sonic data according to sonic datum array and locidx */ /* calculate arrIdx and atomIdx */ Vals[0] = int64_0; Vals[1] = int32_pos_sdarray_nbit; nbit = builder.CreateInBoundsGEP(sdata[i], Vals); nbit = builder.CreateAlignedLoad(nbit, 4, "arridx"); arrIdx = builder.CreateLShr(locidx, nbit); arrIdx = builder.CreateSExt(arrIdx, int64Type); Vals[1] = int32_pos_sdarray_atomsize; atomsize = builder.CreateInBoundsGEP(sdata[i], Vals); atomsize = builder.CreateAlignedLoad(atomsize, 4, "atomsize"); atomsize = builder.CreateSub(atomsize, int32_1); atomIdx = builder.CreateAnd(atomsize, locidx); atomIdx = builder.CreateSExt(atomIdx, int64Type); /* get sdata[aggidx]->m_arr[arrIdx] */ Vals[1] = int32_pos_sdarray_arr; atom = builder.CreateInBoundsGEP(sdata[i], Vals); atom = builder.CreateAlignedLoad(atom, 8, "atomptr"); atom = builder.CreateInBoundsGEP(atom, arrIdx); atom = builder.CreateAlignedLoad(atom, 8, "atom"); /* get address of sdata->m_arr[arrIdx]->data[atomIdx] */ Vals[1] = int32_pos_atom_data; data = builder.CreateInBoundsGEP(atom, Vals); data = builder.CreateAlignedLoad(data, 8, "data"); data = builder.CreateBitCast(data, int64PtrType); llvm::Value* dataptr = builder.CreateInBoundsGEP(data, atomIdx); /* get scount->m_arr[arrIdx] */ Vals[1] = int32_pos_sdarray_arr; cntatom = builder.CreateInBoundsGEP(scount, Vals); cntatom = builder.CreateAlignedLoad(cntatom, 8, "cntatomptr"); cntatom = builder.CreateInBoundsGEP(cntatom, arrIdx); cntatom = builder.CreateAlignedLoad(cntatom, 8, "cntatom"); /* get address of scount->m_arr[arrIdx]->data[atomIdx] */ Vals[1] = int32_pos_atom_data; cntdata = builder.CreateInBoundsGEP(cntatom, Vals); cntdata = builder.CreateAlignedLoad(cntdata, 8, "cntdata"); cntdata = builder.CreateBitCast(cntdata, int64PtrType); llvm::Value* cntptr = builder.CreateInBoundsGEP(cntdata, atomIdx); /* get sdata[aggidx]->m_arr[arrIdx]->nullFlag[atomIdx] */ Vals[1] = int32_pos_atom_nullflag; llvm::Value* dataflag = builder.CreateInBoundsGEP(atom, Vals); dataflag = builder.CreateAlignedLoad(dataflag, 8, "dataflagptr"); dataflag = builder.CreateInBoundsGEP(dataflag, atomIdx); /* get scount>m_arr[arrIdx]->nullFlag[atomIdx] */ Vals[1] = int32_pos_atom_nullflag; llvm::Value* cntflag = builder.CreateInBoundsGEP(cntatom, Vals); cntflag = builder.CreateAlignedLoad(cntflag, 8, "cntflagptr"); cntflag = builder.CreateInBoundsGEP(cntflag, atomIdx); /* Now load the sonic data flag and check it */ tmpval = builder.CreateAlignedLoad(dataflag, 1, "dataFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* sonic data is null, store the value in data and count */ builder.SetInsertPoint(agg_then); /* do leftarg = DatumGetBINumeric(pVal[i]) */ tmpval = DatumGetBINumericCodeGen(&builder, result); tmpval = builder.CreatePtrToInt(tmpval, int64Type); /* corresponding to addVariable(context, NumericGetDatum(leftarg)) */ tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, dataptr, 8); /* count set to be one */ builder.CreateAlignedStore(int64_1, cntptr, 8); /* set cell flag */ builder.CreateAlignedStore(int8_0, dataflag, 1); builder.CreateAlignedStore(int8_0, cntflag, 1); /* turn to next basicblock or end this */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* cell be not null, do aggregation */ builder.SetInsertPoint(agg_else); /* * When fast_aggref is true and numSimpleVars is greater than zero, * the expr is numeric type var. Convert this numeric type data to * SiNumeric data to get the value. */ llvm::Value* bires = DatumGetBINumericCodeGen(&builder, result); /* extract the header of result to check if it is BINumeric */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "biheader"); llvm::Value* rflag = builder.CreateAnd(tmpval, val_mask); /* get leftdata[0] and convert to Numeric to get lflag */ llvm::Value* realdata = builder.CreateAlignedLoad(dataptr, 8, "dataval"); llvm::Value* leftarg = builder.CreateIntToPtr(realdata, numericPtrType); tmpval = builder.CreateInBoundsGEP(leftarg, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "sonicheader"); llvm::Value* lflag = builder.CreateAnd(tmpval, val_mask); /* check if either of them is not BI64 */ llvm::Value* oparg1 = builder.CreateICmpEQ(lflag, val_binum64); llvm::Value* oparg2 = builder.CreateICmpEQ(rflag, val_binum64); llvm::Value* bothbi64 = builder.CreateAnd(oparg1, oparg2); /* use fast path only when both args are bi64 */ builder.CreateCondBr(bothbi64, bisum_bblock, numsum_bblock); builder.SetInsertPoint(bisum_bblock); /* extract the actual data of numeric only when value is not null */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_1; tmpval = builder.CreateInBoundsGEP(bires, Vals4); tmpval = builder.CreateBitCast(tmpval, int64PtrType); llvm::Value* resval = builder.CreateAlignedLoad(tmpval, 8, "value"); llvm::Value* sonicptr = builder.CreateAdd(realdata, int64_6); sonicptr = builder.CreateIntToPtr(sonicptr, int64PtrType); llvm::Value* midval = builder.CreateAlignedLoad(sonicptr, 8); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sonicadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sonicadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* aggres = builder.CreateCall(func_sonicadd_overflow, {resval, midval}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, numsum_bblock, normal_bb); builder.SetInsertPoint(numsum_bblock); llvm::Function* func_vsnumericavg = llvmCodeGen->module()->getFunction("Jitted_sonic_numericavg"); if (NULL == func_vsnumericavg) { func_vsnumericavg = vsnumeric_avg_codegen(aggref); } sdata[i] = builder.CreateBitCast(sdata[i], sonicEncodingDatumArrayPtrType); builder.CreateCall(func_vsnumericavg, {sdata[i], scount, locidx, result}); builder.CreateBr(agg_end); builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, sonicptr, 8); /* cell->m_val[idx+1].val++ */ tmpval = builder.CreateAlignedLoad(cntptr, 8, "count"); tmpval = builder.CreateAdd(tmpval, int64_1); builder.CreateAlignedStore(tmpval, cntptr, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else if (fast_aggref) { /* * If aggref can be evaluated in fast path and be numeric * expressions, use the result from fastexpr. */ Assert(bisum_bb != NULL); Assert(numsum_bb != NULL); DEFINE_BLOCK(agg_end, jitted_sonicbatchagg); DEFINE_BLOCK(agg_then, jitted_sonicbatchagg); DEFINE_BLOCK(agg_else, jitted_sonicbatchagg); DEFINE_BLOCK(normal_bb, jitted_sonicbatchagg); DEFINE_BLOCK(expr_bisum_bb, jitted_sonicbatchagg); DEFINE_BLOCK(bioverflow_bb, jitted_sonicbatchagg); builder.CreateBr(numsum_bb); builder.SetInsertPoint(bisum_bb); llvm::Value* resval = builder.CreateExtractValue(expres, 1); /* get sonic data according to sonic datum array and locidx */ /* calculate arrIdx and atomIdx */ Vals[0] = int64_0; Vals[1] = int32_pos_sdarray_nbit; nbit = builder.CreateInBoundsGEP(sdata[i], Vals); nbit = builder.CreateAlignedLoad(nbit, 4, "arridx"); arrIdx = builder.CreateLShr(locidx, nbit); arrIdx = builder.CreateSExt(arrIdx, int64Type); Vals[1] = int32_pos_sdarray_atomsize; atomsize = builder.CreateInBoundsGEP(sdata[i], Vals); atomsize = builder.CreateAlignedLoad(atomsize, 4, "atomsize"); atomsize = builder.CreateSub(atomsize, int32_1); atomIdx = builder.CreateAnd(atomsize, locidx); atomIdx = builder.CreateSExt(atomIdx, int64Type); /* get sdata[aggidx]->m_arr[arrIdx] */ Vals[1] = int32_pos_sdarray_arr; atom = builder.CreateInBoundsGEP(sdata[i], Vals); atom = builder.CreateAlignedLoad(atom, 8, "atomptr"); atom = builder.CreateInBoundsGEP(atom, arrIdx); atom = builder.CreateAlignedLoad(atom, 8, "atom"); /* get address of sdata->m_arr[arrIdx]->data[atomIdx] */ Vals[1] = int32_pos_atom_data; data = builder.CreateInBoundsGEP(atom, Vals); data = builder.CreateAlignedLoad(data, 8, "data"); data = builder.CreateBitCast(data, int64PtrType); llvm::Value* dataptr = builder.CreateInBoundsGEP(data, atomIdx); /* get scount->m_arr[arrIdx] */ Vals[1] = int32_pos_sdarray_arr; cntatom = builder.CreateInBoundsGEP(scount, Vals); cntatom = builder.CreateAlignedLoad(cntatom, 8, "cntatomptr"); cntatom = builder.CreateInBoundsGEP(cntatom, arrIdx); cntatom = builder.CreateAlignedLoad(cntatom, 8, "cntatom"); /* get address of scount->m_arr[arrIdx]->data[atomIdx] */ Vals[1] = int32_pos_atom_data; cntdata = builder.CreateInBoundsGEP(cntatom, Vals); cntdata = builder.CreateAlignedLoad(cntdata, 8, "cntdata"); cntdata = builder.CreateBitCast(cntdata, int64PtrType); llvm::Value* cntptr = builder.CreateInBoundsGEP(cntdata, atomIdx); /* get sdata[aggidx]->m_arr[arrIdx]->nullFlag[atomIdx] */ Vals[1] = int32_pos_atom_nullflag; llvm::Value* dataflag = builder.CreateInBoundsGEP(atom, Vals); dataflag = builder.CreateAlignedLoad(dataflag, 8, "dataflagptr"); dataflag = builder.CreateInBoundsGEP(dataflag, atomIdx); /* get scount>m_arr[arrIdx]->nullFlag[atomIdx] */ Vals[1] = int32_pos_atom_nullflag; llvm::Value* cntflag = builder.CreateInBoundsGEP(cntatom, Vals); cntflag = builder.CreateAlignedLoad(cntflag, 8, "cntflagptr"); cntflag = builder.CreateInBoundsGEP(cntflag, atomIdx); /* Now load the sonic data flag and check it */ tmpval = builder.CreateAlignedLoad(dataflag, 1, "dataFlag"); tmpval = builder.CreateAnd(tmpval, int8_1); tmpval = builder.CreateICmpEQ(tmpval, int8_0); builder.CreateCondBr(tmpval, agg_else, agg_then); /* cell be null, add Variable */ builder.SetInsertPoint(agg_then); exprscale = GetAlignedScale(estate->expr); llvm::Value* alignedscale = llvmCodeGen->getIntConstant(CHAROID, exprscale); /* * should make a new context to record the result : the * following code corresponding to: * 'leftarg = DatumGetBINumeric(pVal[i]); * data->setValue(NumericGetDatum(leftarg), false, arrIdx, atomIdx);'. */ tmpval = WrapmakeNumeric64CodeGen(&builder, resval, alignedscale); tmpval = DatumGetBINumericCodeGen(&builder, tmpval); tmpval = builder.CreatePtrToInt(tmpval, int64Type); tmpval = WrapaddVariableCodeGen(&builder, hcxt, tmpval); builder.CreateAlignedStore(tmpval, dataptr, 8); /* count set to be one */ builder.CreateAlignedStore(int64_1, cntptr, 8); /* set the flag of hashcell */ builder.CreateAlignedStore(int8_0, dataflag, 1); builder.CreateAlignedStore(int8_0, cntflag, 1); /* turn to next basicblock */ if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); builder.SetInsertPoint(agg_else); llvm::Value* realdata = builder.CreateAlignedLoad(dataptr, 8, "cell_val"); /* first make sure the value in cell is BI64 format */ Vals4[0] = int64_0; Vals4[1] = int32_1; Vals4[2] = int32_0; Vals4[3] = int32_0; llvm::Value* leftflag = builder.CreateIntToPtr(realdata, numericPtrType); tmpval = builder.CreateInBoundsGEP(leftflag, Vals4); tmpval = builder.CreateAlignedLoad(tmpval, 2, "leftheader"); llvm::Value* biflag = builder.CreateAnd(tmpval, val_mask); llvm::Value* isbi64 = builder.CreateICmpEQ(biflag, val_binum64); builder.CreateCondBr(isbi64, expr_bisum_bb, bioverflow_bb); /* * do aggregation directly only when both expr value * and cell value is bi64. */ builder.SetInsertPoint(expr_bisum_bb); llvm::Value* sonicptr = builder.CreateAdd(realdata, int64_6); sonicptr = builder.CreateIntToPtr(sonicptr, int64PtrType); llvm::Value* midval = builder.CreateAlignedLoad(sonicptr, 8); /* check overflow */ llvm::Type* Intrinsic_Tys[] = {int64Type}; llvm::Function* func_sonicadd_overflow = llvm::Intrinsic::getDeclaration(mod, llvm::Intrinsic::sadd_with_overflow, Intrinsic_Tys); if (func_sonicadd_overflow == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Cannot get the llvm::Intrinsic::sadd_with_overflow function!\n"))); } llvm::Value* aggres = builder.CreateCall(func_sonicadd_overflow, {resval, midval}); llvm::Value* oflag = builder.CreateExtractValue(aggres, 1); builder.CreateCondBr(oflag, bioverflow_bb, normal_bb); /* make numeric64 when meet overflow */ builder.SetInsertPoint(bioverflow_bb); exprscale = GetAlignedScale(estate->expr); llvm::Value* ascale = llvmCodeGen->getIntConstant(CHAROID, exprscale); llvm::Value* bioverres = WrapmakeNumeric64CodeGen(&builder, resval, ascale); builder.CreateBr(numsum_bb); builder.SetInsertPoint(numsum_bb); llvm::PHINode* numres = builder.CreatePHI(int64Type, 2); numres->addIncoming(result, flag_then[i]); numres->addIncoming(bioverres, bioverflow_bb); llvm::Value* evalval = (llvm::Value*)numres; llvm::Function* func_vsnumericavg = llvmCodeGen->module()->getFunction("Jitted_sonic_numericavg"); if (NULL == func_vsnumericavg) { func_vsnumericavg = vsnumeric_avg_codegen(aggref); } sdata[i] = builder.CreateBitCast(sdata[i], sonicEncodingDatumArrayPtrType); builder.CreateCall(func_vsnumericavg, {sdata[i], scount, locidx, evalval}); builder.CreateBr(agg_end); /* if there is no overflow, extract result directly */ builder.SetInsertPoint(normal_bb); llvm::Value* sumval = builder.CreateExtractValue(aggres, 0); builder.CreateAlignedStore(sumval, sonicptr, 8); /* cell->m_val[idx+1].val++ */ tmpval = builder.CreateAlignedLoad(cntptr, 8, "count"); tmpval = builder.CreateAdd(tmpval, int64_1); builder.CreateAlignedStore(tmpval, cntptr, 8); builder.CreateBr(agg_end); builder.SetInsertPoint(agg_end); } else { llvm::Function* func_vsnumericavg = llvmCodeGen->module()->getFunction("Jitted_sonic_numericavg"); if (NULL == func_vsnumericavg) { func_vsnumericavg = vsnumeric_avg_codegen(aggref); } sdata[i] = builder.CreateBitCast(sdata[i], sonicEncodingDatumArrayPtrType); builder.CreateCall(func_vsnumericavg, {sdata[i], scount, locidx, result}); } } break; default: ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), errmodule(MOD_LLVM), errmsg("Unsupported agg function %u!", aggref->aggfnoid))); break; } if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); /* if the current flag is null, turn to next agg */ builder.SetInsertPoint(flag_else[i]); if (i == numaggs - 1) builder.CreateBr(for_inc); else builder.CreateBr(agg_bb[i + 1]); } } /* codegen in the for_inc basic block: compare the loop index with nrows */ builder.SetInsertPoint(for_inc); tmpval = builder.CreateTrunc(idx_next, int32Type); tmpval = builder.CreateICmpEQ(tmpval, nValues); builder.CreateCondBr(tmpval, for_end, for_body); /* codegen in for_end basic block: just return void */ builder.SetInsertPoint(for_end); (void)VecExprCodeGen::MemCxtSwitToCodeGen(&builder, agg_oldcontext); WrapResetEContextCodeGen(&builder, mecontext); WrapResetEContextCodeGen(&builder, econtext); builder.CreateRetVoid(); pfree_ext(aggIdxList); pfree_ext(sdata); pfree_ext(agg_bb); pfree_ext(flag_then); pfree_ext(flag_else); pfree_ext(batch_vals); pfree_ext(batch_flag); llvmCodeGen->FinalizeFunction(jitted_sonicbatchagg, node->ss.ps.plan->plan_node_id); return jitted_sonicbatchagg; } void VecHashAggCodeGen::WarpAllocHashSlotCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value* hAggRunner, llvm::Value* batch, llvm::Value* idx, llvm::Value* keysimple) { GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_PTRTYPE(vectorBatchPtrType, "class.VectorBatch"); DEFINE_CG_PTRTYPE(hashAggRunnerPtrType, "class.HashAggRunner"); llvm::Function* func_allochashslot = llvmCodeGen->module()->getFunction("IRAllocHashSlot"); if (NULL == func_allochashslot) { GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "IRAllocHashSlot", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("hAggRunner", hashAggRunnerPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("batch", vectorBatchPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("idx", int32Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("keysimple", int32Type)); func_allochashslot = fn_prototype.generatePrototype(NULL, NULL); llvm::sys::DynamicLibrary::AddSymbol("IRAllocHashSlot", (void*)WrapAllocHashSlot); } idx = ptrbuilder->CreateTrunc(idx, int32Type); ptrbuilder->CreateCall(func_allochashslot, {hAggRunner, batch, idx, keysimple}); return; } void VecHashAggCodeGen::WarpSglTblAllocHashSlotCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value* hAggRunner, llvm::Value* batch, llvm::Value* idx, llvm::Value* keysimple) { GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_PTRTYPE(vectorBatchPtrType, "class.VectorBatch"); DEFINE_CG_PTRTYPE(hashAggRunnerPtrType, "class.HashAggRunner"); llvm::Function* func_sgltblallochashslot = llvmCodeGen->module()->getFunction("IRSglTblAllocHashSlot"); if (NULL == func_sgltblallochashslot) { GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "IRSglTblAllocHashSlot", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("hAggRunner", hashAggRunnerPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("batch", vectorBatchPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("idx", int32Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("keysimple", int32Type)); func_sgltblallochashslot = fn_prototype.generatePrototype(NULL, NULL); llvm::sys::DynamicLibrary::AddSymbol("IRSglTblAllocHashSlot", (void*)WrapSglTblAllocHashSlot); } idx = ptrbuilder->CreateTrunc(idx, int32Type); ptrbuilder->CreateCall(func_sgltblallochashslot, {hAggRunner, batch, idx, keysimple}); return; } void VecHashAggCodeGen::WrapResetEContextCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value* econtext) { GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_PTRTYPE(ExprContextPtrType, "struct.ExprContext"); /* Associate with ResetExprContext */ llvm::Function* func_resetec = llvmCodeGen->module()->getFunction("IRResetExprContext"); if (NULL == func_resetec) { GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "IRResetExprContext", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("econtext", ExprContextPtrType)); func_resetec = fn_prototype.generatePrototype(NULL, NULL); llvm::sys::DynamicLibrary::AddSymbol("IRResetExprContext", (void*)WrapResetExprContext); } ptrbuilder->CreateCall(func_resetec, econtext); return; } /* * @Description : Generate the function to prefetch the hash cell ahead * of 2, which means hint fetch loc[i+2]. This function * is called by BatchAggregation. * @return : return llvm prefetch function for batchaggregation. */ llvm::Function* prefetchBatchAggregationCodeGen() { Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); llvm::Value* tmpval = NULL; llvm::Value* llvmargs[3]; llvm::Function* jitted_batchagg_prefetch = NULL; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(int8PtrType, CHAROID); DEFINE_CG_PTRTYPE(hashCellPtrType, "struct.hashCell"); llvm::Type* hashCellPtrPtrType = llvmCodeGen->getPtrType(hashCellPtrType); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT32(int32_3, 3); DEFINE_CGVAR_INT64(int64_distance, PREFETCH_BATCHAGGREGATION_DISTANCE); GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "prefetchBatchAggregation", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("Loc", hashCellPtrPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("Idx", int64Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("nrows", int64Type)); jitted_batchagg_prefetch = fn_prototype.generatePrototype(&builder, &llvmargs[0]); llvm::Value* Loc = llvmargs[0]; llvm::Value* idx = llvmargs[1]; llvm::Value* nrows = llvmargs[2]; DEFINE_BLOCK(prefetch_bb, jitted_batchagg_prefetch); DEFINE_BLOCK(ret_bb, jitted_batchagg_prefetch); llvm::Value* prefetchIdx = builder.CreateAdd(idx, int64_distance); tmpval = builder.CreateICmpSLT(prefetchIdx, nrows); builder.CreateCondBr(tmpval, prefetch_bb, ret_bb); builder.SetInsertPoint(prefetch_bb); llvm::Function* fn_prefetch = llvm::Intrinsic::getDeclaration(llvmCodeGen->module(), llvm::Intrinsic::prefetch); if (fn_prefetch == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmsg("Failed to get llvm function Intrinsic::prefetch!\n"))); } tmpval = builder.CreateInBoundsGEP(Loc, prefetchIdx); tmpval = builder.CreateAlignedLoad(tmpval, 8, "hashCell"); tmpval = builder.CreateBitCast(tmpval, int8PtrType); builder.CreateCall(fn_prefetch, {tmpval, int32_0, int32_3, int32_1}); builder.CreateBr(ret_bb); builder.SetInsertPoint(ret_bb); builder.CreateRetVoid(); llvmCodeGen->FinalizeFunction(jitted_batchagg_prefetch); return jitted_batchagg_prefetch; } /* * @Description : Generate the function to prefetch the hash cell ahead * of 2, which means hint fetch loc[i+2], and prefetch the * address of m_data ahead of 4, which means hint fetch * m_data[m_cacheLoc[i+4]]. This function is called in the * loop of the matching key. * @return : return llvm function for building hash table in batchagg. */ llvm::Function* prefetchAggHashingCodeGen() { Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); llvm::Value* tmpval = NULL; llvm::Value* llvmargs[3]; llvm::Function* jitted_hashing_prefetch = NULL; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int32Type, INT4OID); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(int8PtrType, CHAROID); DEFINE_CG_PTRTYPE(hashAggRunnerPtrType, "class.HashAggRunner"); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT32(int32_3, 3); DEFINE_CGVAR_INT32(int32_pos_hAggR_hashVal, pos_hAggR_hashVal); DEFINE_CGVAR_INT32(int32_pos_hAggR_hSegTbl, pos_hAggR_hSegTbl); DEFINE_CGVAR_INT32(int32_pos_hAggR_hsegmax, pos_hAggR_hsegmax); DEFINE_CGVAR_INT32(int32_pos_hAggR_hashSize, pos_hAggR_hashSize); DEFINE_CGVAR_INT64(int64_0, 0); DEFINE_CGVAR_INT64(int64_1, 1); DEFINE_CGVAR_INT64(int64_distance1, PREFETCH_AGGHASHING_DISTANCE1); DEFINE_CGVAR_INT64(int64_distance2, PREFETCH_AGGHASHING_DISTANCE2); llvm::Value* Vals2[2] = {int64_0, int32_0}; llvm::Value* Vals3[3] = {int64_0, int32_0, int32_0}; GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "prefetchAggHashing", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("hAggRunner", hashAggRunnerPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("Idx", int64Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("nrows", int64Type)); jitted_hashing_prefetch = fn_prototype.generatePrototype(&builder, &llvmargs[0]); llvm::Value* hAggRunner = llvmargs[0]; llvm::Value* idx = llvmargs[1]; llvm::Value* nrows = llvmargs[2]; DEFINE_BLOCK(prefetch_hashData_bb, jitted_hashing_prefetch); DEFINE_BLOCK(prefetch_cell_bb, jitted_hashing_prefetch); DEFINE_BLOCK(ret_bb, jitted_hashing_prefetch); /* * try to prefetch hashData[m_cacheLoc[i+4]] : only do this when we have * enough rows. */ llvm::Value* prefetchIdx = builder.CreateAdd(idx, int64_distance1); tmpval = builder.CreateICmpSLT(prefetchIdx, nrows); builder.CreateCondBr(tmpval, prefetch_hashData_bb, ret_bb); builder.SetInsertPoint(prefetch_hashData_bb); /* mask = hashAggRunner.m_hashSize - 1 */ Vals2[1] = int32_pos_hAggR_hashSize; tmpval = builder.CreateInBoundsGEP(hAggRunner, Vals2); llvm::Value* maskval = builder.CreateAlignedLoad(tmpval, 8, "m_hashSize"); maskval = builder.CreateSub(maskval, int64_1, "mask"); /* calculate m_cacheLoc[i+4] = m_hashVal[i+4] & mask */ llvm::Value* next_hashData_idx = builder.CreateAdd(idx, int64_distance1); Vals3[1] = int32_pos_hAggR_hashVal; Vals3[2] = next_hashData_idx; llvm::Value* hashValSlot = builder.CreateInBoundsGEP(hAggRunner, Vals3); llvm::Value* hashVal64 = builder.CreateAlignedLoad(hashValSlot, 8, "hashVal"); llvm::Value* cacheLocVal = builder.CreateAnd(hashVal64, maskval); /* get hashData = hashAggRunner->m_hashData.tbl_data */ /* calculate nsegs and pos according to m_cacheLoc[i+4] */ Vals2[0] = int64_0; Vals2[1] = int32_pos_hAggR_hsegmax; llvm::Value* segmaxval = builder.CreateInBoundsGEP(hAggRunner, Vals2); segmaxval = builder.CreateAlignedLoad(segmaxval, 4, "segmax"); segmaxval = builder.CreateSExt(segmaxval, int64Type); /* nsegs = m_cacheLoc[i+4] / m_hashseg_max */ llvm::Value* nsegsval = builder.CreateExactUDiv(cacheLocVal, segmaxval, "nsegs"); nsegsval = builder.CreateTrunc(nsegsval, int32Type); /* pos = m_cacheLoc[i+4] % m_hashseg_max */ llvm::Value* pos = builder.CreateSRem(cacheLocVal, segmaxval, "pos"); /* get m_hashData from hashAggRunner */ Vals2[0] = int64_0; Vals2[1] = int32_pos_hAggR_hSegTbl; llvm::Value* hashData = builder.CreateInBoundsGEP(hAggRunner, Vals2); hashData = builder.CreateAlignedLoad(hashData, 8, "m_hashData"); Vals2[0] = nsegsval; Vals2[1] = int32_1; llvm::Value* tbldata = builder.CreateInBoundsGEP(hashData, Vals2); tbldata = builder.CreateAlignedLoad(tbldata, 8, "tbl_data"); llvm::Value* next_hashData_addr = builder.CreateInBoundsGEP(tbldata, pos); llvm::Function* fn_prefetch = llvm::Intrinsic::getDeclaration(llvmCodeGen->module(), llvm::Intrinsic::prefetch); if (fn_prefetch == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Failed to get function Intrinsic::prefetch!\n"))); } tmpval = builder.CreateBitCast(next_hashData_addr, int8PtrType); builder.CreateCall(fn_prefetch, {tmpval, int32_0, int32_3, int32_1}); /* calculate m_cacheLoc[i+2] = m_hashVal[i+2] & mask */ llvm::Value* next_cell_idx = builder.CreateAdd(idx, int64_distance2); Vals3[1] = int32_pos_hAggR_hashVal; Vals3[2] = next_cell_idx; hashValSlot = builder.CreateInBoundsGEP(hAggRunner, Vals3); hashVal64 = builder.CreateAlignedLoad(hashValSlot, 8, "hashVal"); cacheLocVal = builder.CreateAnd(hashVal64, maskval); /* nsegs = m_cacheLoc[i+2] / m_hashseg_max */ llvm::Value* nsegsval2 = builder.CreateExactUDiv(cacheLocVal, segmaxval, "nsegs"); nsegsval2 = builder.CreateTrunc(nsegsval2, int32Type); /* pos = m_cacheLoc[i+2] % m_hashseg_max */ llvm::Value* pos2 = builder.CreateSRem(cacheLocVal, segmaxval, "pos"); Vals2[0] = nsegsval2; Vals2[1] = int32_1; llvm::Value* tbldata2 = builder.CreateInBoundsGEP(hashData, Vals2); tbldata2 = builder.CreateAlignedLoad(tbldata2, 8, "tbl_data"); tmpval = builder.CreateInBoundsGEP(tbldata2, pos2); llvm::Value* next_cell = builder.CreateAlignedLoad(tmpval, 8, "cell"); tmpval = builder.CreatePtrToInt(next_cell, int64Type); tmpval = builder.CreateICmpEQ(tmpval, int64_0); builder.CreateCondBr(tmpval, ret_bb, prefetch_cell_bb); builder.SetInsertPoint(prefetch_cell_bb); next_cell = builder.CreateBitCast(next_cell, int8PtrType); builder.CreateCall(fn_prefetch, {next_cell, int32_0, int32_3, int32_1}); builder.CreateBr(ret_bb); builder.SetInsertPoint(ret_bb); builder.CreateRetVoid(); llvmCodeGen->FinalizeFunction(jitted_hashing_prefetch); return jitted_hashing_prefetch; } /* * @Description : Generate the function to prefetch the hash cell ahead * of 2, which means hint fetch loc[i+2], and prefetch the * address of m_data ahead of 4, which means hint fetch * m_data[m_cacheLoc[i+4]]. This function is called in the * loop of the matching key. * @return : return llvm function for building hash table in batchagg. */ llvm::Function* prefetchAggSglTblHashingCodeGen() { Assert(NULL != (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj); GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; /* Get LLVM Context and builder */ llvm::LLVMContext& context = llvmCodeGen->context(); GsCodeGen::LlvmBuilder builder(context); llvm::Value* tmpval = NULL; llvm::Value* llvmargs[3]; llvm::Function* jitted_hashing_prefetch = NULL; /* Define data types and some llvm consts */ DEFINE_CG_VOIDTYPE(voidType); DEFINE_CG_TYPE(int64Type, INT8OID); DEFINE_CG_PTRTYPE(int8PtrType, CHAROID); DEFINE_CG_PTRTYPE(hashAggRunnerPtrType, "class.HashAggRunner"); DEFINE_CGVAR_INT32(int32_0, 0); DEFINE_CGVAR_INT32(int32_1, 1); DEFINE_CGVAR_INT32(int32_3, 3); DEFINE_CGVAR_INT32(int32_pos_hAggR_hashVal, pos_hAggR_hashVal); DEFINE_CGVAR_INT32(int32_pos_hAggR_hSegTbl, pos_hAggR_hSegTbl); DEFINE_CGVAR_INT32(int32_pos_hAggR_hashSize, pos_hAggR_hashSize); DEFINE_CGVAR_INT64(int64_0, 0); DEFINE_CGVAR_INT64(int64_1, 1); DEFINE_CGVAR_INT64(int64_distance1, PREFETCH_AGGHASHING_DISTANCE1); DEFINE_CGVAR_INT64(int64_distance2, PREFETCH_AGGHASHING_DISTANCE2); llvm::Value* Vals2[2] = {int64_0, int32_0}; llvm::Value* Vals3[3] = {int64_0, int32_0, int32_0}; GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "prefetchAggSglTblHashing", voidType); fn_prototype.addArgument(GsCodeGen::NamedVariable("hAggRunner", hashAggRunnerPtrType)); fn_prototype.addArgument(GsCodeGen::NamedVariable("Idx", int64Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("nrows", int64Type)); jitted_hashing_prefetch = fn_prototype.generatePrototype(&builder, &llvmargs[0]); llvm::Value* hAggRunner = llvmargs[0]; llvm::Value* idx = llvmargs[1]; llvm::Value* nrows = llvmargs[2]; DEFINE_BLOCK(prefetch_hashData_bb, jitted_hashing_prefetch); DEFINE_BLOCK(prefetch_cell_bb, jitted_hashing_prefetch); DEFINE_BLOCK(ret_bb, jitted_hashing_prefetch); /* * try to prefetch hashData[m_cacheLoc[i+4]] : only do this when we have * enough rows. */ llvm::Value* prefetchIdx = builder.CreateAdd(idx, int64_distance1); tmpval = builder.CreateICmpSLT(prefetchIdx, nrows); builder.CreateCondBr(tmpval, prefetch_hashData_bb, ret_bb); builder.SetInsertPoint(prefetch_hashData_bb); /* mask = hashAggRunner.m_hashSize - 1 */ Vals2[1] = int32_pos_hAggR_hashSize; tmpval = builder.CreateInBoundsGEP(hAggRunner, Vals2); llvm::Value* maskval = builder.CreateAlignedLoad(tmpval, 8, "m_hashSize"); maskval = builder.CreateSub(maskval, int64_1, "mask"); /* calculate m_cacheLoc[i+4] = m_hashVal[i+4] & mask */ llvm::Value* next_hashData_idx = builder.CreateAdd(idx, int64_distance1); Vals3[1] = int32_pos_hAggR_hashVal; Vals3[2] = next_hashData_idx; llvm::Value* hashValSlot = builder.CreateInBoundsGEP(hAggRunner, Vals3); llvm::Value* hashVal64 = builder.CreateAlignedLoad(hashValSlot, 8, "hashVal"); llvm::Value* cacheLocVal = builder.CreateAnd(hashVal64, maskval); /* get m_hashData from hashAggRunner */ Vals2[0] = int64_0; Vals2[1] = int32_pos_hAggR_hSegTbl; llvm::Value* hashData = builder.CreateInBoundsGEP(hAggRunner, Vals2); hashData = builder.CreateAlignedLoad(hashData, 8, "m_hashData"); /* calculate address of m_hashData[0].tbl_data[m_cacheLoc[i+4]] */ Vals2[0] = int64_0; Vals2[1] = int32_1; llvm::Value* tbldata = builder.CreateInBoundsGEP(hashData, Vals2); tbldata = builder.CreateAlignedLoad(tbldata, 8, "tbl_data"); llvm::Value* next_hashData_addr = builder.CreateInBoundsGEP(tbldata, cacheLocVal); llvm::Function* fn_prefetch = llvm::Intrinsic::getDeclaration(llvmCodeGen->module(), llvm::Intrinsic::prefetch); if (fn_prefetch == NULL) { ereport(ERROR, (errcode(ERRCODE_LOAD_INTRINSIC_FUNCTION_FAILED), errmodule(MOD_LLVM), errmsg("Failed to get function Intrinsic::prefetch!\n"))); } tmpval = builder.CreateBitCast(next_hashData_addr, int8PtrType); builder.CreateCall(fn_prefetch, {tmpval, int32_0, int32_3, int32_1}); /* calculate m_cacheLoc[i+2] = m_hashVal[i+2] & mask */ llvm::Value* next_cell_idx = builder.CreateAdd(idx, int64_distance2); Vals3[1] = int32_pos_hAggR_hashVal; Vals3[2] = next_cell_idx; hashValSlot = builder.CreateInBoundsGEP(hAggRunner, Vals3); hashVal64 = builder.CreateAlignedLoad(hashValSlot, 8, "hashVal"); cacheLocVal = builder.CreateAnd(hashVal64, maskval); llvm::Value* next_cell = builder.CreateInBoundsGEP(tbldata, cacheLocVal); next_cell = builder.CreateAlignedLoad(next_cell, 8, "cell"); tmpval = builder.CreatePtrToInt(next_cell, int64Type); tmpval = builder.CreateICmpEQ(tmpval, int64_0); builder.CreateCondBr(tmpval, ret_bb, prefetch_cell_bb); builder.SetInsertPoint(prefetch_cell_bb); next_cell = builder.CreateBitCast(next_cell, int8PtrType); builder.CreateCall(fn_prefetch, {next_cell, int32_0, int32_3, int32_1}); builder.CreateBr(ret_bb); builder.SetInsertPoint(ret_bb); builder.CreateRetVoid(); llvmCodeGen->FinalizeFunction(jitted_hashing_prefetch); return jitted_hashing_prefetch; } } // namespace dorado /** * @Description : If current hashcell is not matched, allocate a new hash cell and * initialize the hash value. * @in haRunner : HashAggRunner data structure. * @in batch : Batch value used to initialize the hash cell. * @in idx : The row index of current tuple in batch. * @in keysimple : the match key is simple when true. */ void WrapAllocHashSlot(HashAggRunner* haRunner, VectorBatch* batch, int idx, int keysimple) { if (keysimple) haRunner->AllocHashSlot(batch, idx); else haRunner->AllocHashSlot(batch, idx); } /** * @Description : If current hashcell is not matched, allocate a new hash cell and * initialize the hash value with only one segment hash table case. * @in haRunner : HashAggRunner data structure. * @in batch : Batch value used to initialize the hash cell. * @in idx : The row index of current tuple in batch. * @in keysimple : the match key is simple when true. */ void WrapSglTblAllocHashSlot(HashAggRunner* haRunner, VectorBatch* batch, int idx, int keysimple) { if (keysimple) haRunner->AllocHashSlot(batch, idx); else haRunner->AllocHashSlot(batch, idx); } /** * @Description : Reset current expr context. * @econtext : Current expr context. */ void WrapResetExprContext(ExprContext* econtext) { if (econtext != NULL) ResetExprContext(econtext); }