diff --git a/src/common/backend/utils/adt/varchar.cpp b/src/common/backend/utils/adt/varchar.cpp index 25126ab85..9b68d9355 100644 --- a/src/common/backend/utils/adt/varchar.cpp +++ b/src/common/backend/utils/adt/varchar.cpp @@ -1787,12 +1787,13 @@ ScalarVector* vbpcharlen(PG_FUNCTION_ARGS) int len; int eml; eml = pg_database_encoding_max_length(); + bool getTrueLen = DB_IS_CMPT(PG_FORMAT | B_FORMAT); if (pselection != NULL) { for (k = 0; k < nvalues; k++) { if (pselection[k]) { if (NOT_NULL(vflag[k])) { - len = VARSIZE_ANY_EXHDR(varg->m_vals[k]); + len = getTrueLen ? bcTruelen((BpChar*)varg->m_vals[k]) : VARSIZE_ANY_EXHDR(varg->m_vals[k]); if (eml != 1) len = pg_mbstrlen_with_len_eml(VARDATA_ANY(varg->m_vals[k]), len, eml); vresult->m_vals[k] = Int32GetDatum(len); @@ -1805,7 +1806,7 @@ ScalarVector* vbpcharlen(PG_FUNCTION_ARGS) } else { for (k = 0; k < nvalues; k++) { if (NOT_NULL(vflag[k])) { - len = VARSIZE_ANY_EXHDR(varg->m_vals[k]); + len = getTrueLen ? bcTruelen((BpChar*)varg->m_vals[k]) : VARSIZE_ANY_EXHDR(varg->m_vals[k]); if (eml != 1) len = pg_mbstrlen_with_len_eml(VARDATA_ANY(varg->m_vals[k]), len, eml); vresult->m_vals[k] = Int32GetDatum(len); diff --git a/src/gausskernel/runtime/codegen/codegenutil/varcharcodegen.cpp b/src/gausskernel/runtime/codegen/codegenutil/varcharcodegen.cpp index 6f492bd46..8849530a7 100644 --- a/src/gausskernel/runtime/codegen/codegenutil/varcharcodegen.cpp +++ b/src/gausskernel/runtime/codegen/codegenutil/varcharcodegen.cpp @@ -48,12 +48,32 @@ int Wrapmbstrlen(int str_len, char* str_data) } /* - * @Description : Wrap the 'Wrapmbstrlen' function in LLVM + * @Description : The simple case of bcTruelen. + * @in strlen : the length of strdata in bytes. + * @in strdata : the cstring data. + * @return : "True" length (not counting trailing blanks) + */ +int WrapBcTrueLen(int str_len, char* str_data) +{ + int i = str_len - 1; + for (; i >= 0; i--) { + if (str_data[i] != ' ') { + break; + } + } + return i + 1; +} + +/* + * @Description : Wrap the 'funcAddr' function in LLVM, used to get str length in specific method * @in ptrbuilder : LLVM builder structure used to call the IR function. * @in strlen : the length of the cstring in LLVM assemble. * @in strdata : the cstring data in LLVM assemble. + * @in funcName : llvm function name + * @in funcAddr : llvm function address */ -llvm::Value* WrapmbstrlenCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value* str_len, llvm::Value* str_data) +llvm::Value* WrapstrlenCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value* str_len, llvm::Value* str_data, + const char* funcName, void* funcAddr) { GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj; llvm::LLVMContext& context = llvmCodeGen->context(); @@ -64,13 +84,13 @@ llvm::Value* WrapmbstrlenCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value llvm::Value* result = NULL; - llvm::Function* jitted_wrapmbstrlen = llvmCodeGen->module()->getFunction("LLVMIRWrapmbstrlen"); + llvm::Function* jitted_wrapmbstrlen = llvmCodeGen->module()->getFunction(funcName); if (jitted_wrapmbstrlen == NULL) { - GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "LLVMIRWrapmbstrlen", int32Type); + GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, funcName, int32Type); fn_prototype.addArgument(GsCodeGen::NamedVariable("str_len", int32Type)); fn_prototype.addArgument(GsCodeGen::NamedVariable("str_data", int8PtrType)); jitted_wrapmbstrlen = fn_prototype.generatePrototype(NULL, NULL); - llvm::sys::DynamicLibrary::AddSymbol("LLVMIRWrapmbstrlen", (void*)Wrapmbstrlen); + llvm::sys::DynamicLibrary::AddSymbol(funcName, funcAddr); } result = ptrbuilder->CreateCall(jitted_wrapmbstrlen, {str_len, str_data}); @@ -243,9 +263,14 @@ llvm::Function* bpcharlen_codegen(int current_encoding) llvm::Value* lhs_val = builder.CreateCall(func_varlena, lhs_arg, "lval"); str_len = builder.CreateExtractValue(lhs_val, 0); + if (DB_IS_CMPT(PG_FORMAT | B_FORMAT)) { + str_data = builder.CreateExtractValue(lhs_val, 1); + str_len = WrapstrlenCodeGen(&builder, str_len, str_data, "LLVMIRWrapBcTrueLen", (void*)WrapBcTrueLen); + } + if (current_encoding == PG_UTF8) { str_data = builder.CreateExtractValue(lhs_val, 1); - str_len = WrapmbstrlenCodeGen(&builder, str_len, str_data); + str_len = WrapstrlenCodeGen(&builder, str_len, str_data, "LLVMIRWrapmbstrlen", (void*)Wrapmbstrlen); } str_len = builder.CreateZExt(str_len, int64Type); diff --git a/src/test/regress/expected/force_vector_engine.out b/src/test/regress/expected/force_vector_engine.out index bda581480..486f2983a 100644 --- a/src/test/regress/expected/force_vector_engine.out +++ b/src/test/regress/expected/force_vector_engine.out @@ -260,3 +260,73 @@ drop table force_vector_test5; drop table force_vector_test6; drop function func_add_sql; drop schema force_vector_engine cascade; +-- test bpcharlen in pg compatibility +create database pg_length_cmpt_db with dbcompatibility 'PG'; +\c pg_length_cmpt_db +set try_vector_engine_strategy='force'; +create table force_vt_tb1 (a char(10),b varchar(10)); +insert into force_vt_tb1 values('零0','零1二3'); +insert into force_vt_tb1 values('',''); +insert into force_vt_tb1 values('0','0'); +explain(costs off) select length(a),a from force_vt_tb1 order by 1; + QUERY PLAN +---------------------------------------------- + Row Adapter + -> Vector Sort + Sort Key: (length(a)) + -> Vector Adapter(type: BATCH MODE) + -> Seq Scan on force_vt_tb1 +(5 rows) + +select length(a),a,length(b),b from force_vt_tb1 order by 1; + length | a | length | b +--------+------------+--------+-------- + 0 | | 0 | + 1 | 0 | 1 | 0 + 2 | 零0 | 4 | 零1二3 +(3 rows) + +set try_vector_engine_strategy='off'; +explain(costs off) select length(a),a from force_vt_tb1 order by 1; + QUERY PLAN +-------------------------------- + Sort + Sort Key: (length(a)) + -> Seq Scan on force_vt_tb1 +(3 rows) + +select length(a),a,length(b),b from force_vt_tb1 order by 1; + length | a | length | b +--------+------------+--------+-------- + 0 | | 0 | + 1 | 0 | 1 | 0 + 2 | 零0 | 4 | 零1二3 +(3 rows) + +-- test with column table in codegen case +create table force_vt_tb1_col (a char(10),b varchar(10)) with (orientation=column); +insert into force_vt_tb1_col values('零0','零1二3'); +insert into force_vt_tb1_col values('',''); +insert into force_vt_tb1_col values('0','0'); +select length(a),a,length(b),b from force_vt_tb1 order by 1; + length | a | length | b +--------+------------+--------+-------- + 0 | | 0 | + 1 | 0 | 1 | 0 + 2 | 零0 | 4 | 零1二3 +(3 rows) + +set enable_codegen to true; +set codegen_cost_threshold to 0; +select length(a),a,length(b),b from force_vt_tb1 order by 1; + length | a | length | b +--------+------------+--------+-------- + 0 | | 0 | + 1 | 0 | 1 | 0 + 2 | 零0 | 4 | 零1二3 +(3 rows) + +drop table force_vt_tb1; +drop table force_vt_tb1_col; +\c regression; +drop database pg_length_cmpt_db; diff --git a/src/test/regress/sql/force_vector_engine.sql b/src/test/regress/sql/force_vector_engine.sql index f325b0776..64786c256 100644 --- a/src/test/regress/sql/force_vector_engine.sql +++ b/src/test/regress/sql/force_vector_engine.sql @@ -82,3 +82,33 @@ drop table force_vector_test5; drop table force_vector_test6; drop function func_add_sql; drop schema force_vector_engine cascade; + +-- test bpcharlen in pg compatibility +create database pg_length_cmpt_db with dbcompatibility 'PG'; +\c pg_length_cmpt_db +set try_vector_engine_strategy='force'; +create table force_vt_tb1 (a char(10),b varchar(10)); +insert into force_vt_tb1 values('零0','零1二3'); +insert into force_vt_tb1 values('',''); +insert into force_vt_tb1 values('0','0'); +explain(costs off) select length(a),a from force_vt_tb1 order by 1; +select length(a),a,length(b),b from force_vt_tb1 order by 1; + +set try_vector_engine_strategy='off'; +explain(costs off) select length(a),a from force_vt_tb1 order by 1; +select length(a),a,length(b),b from force_vt_tb1 order by 1; + +-- test with column table in codegen case +create table force_vt_tb1_col (a char(10),b varchar(10)) with (orientation=column); +insert into force_vt_tb1_col values('零0','零1二3'); +insert into force_vt_tb1_col values('',''); +insert into force_vt_tb1_col values('0','0'); +select length(a),a,length(b),b from force_vt_tb1 order by 1; +set enable_codegen to true; +set codegen_cost_threshold to 0; +select length(a),a,length(b),b from force_vt_tb1 order by 1; + +drop table force_vt_tb1; +drop table force_vt_tb1_col; +\c regression; +drop database pg_length_cmpt_db;