!1928 修复向量化引擎场景下,length结果不一致的问题

Merge pull request !1928 from pengjiong/array
This commit is contained in:
opengauss-bot
2022-07-13 01:42:04 +00:00
committed by Gitee
4 changed files with 134 additions and 8 deletions

View File

@ -1787,12 +1787,13 @@ ScalarVector* vbpcharlen(PG_FUNCTION_ARGS)
int len;
int eml;
eml = pg_database_encoding_max_length();
bool getTrueLen = DB_IS_CMPT(PG_FORMAT | B_FORMAT);
if (pselection != NULL) {
for (k = 0; k < nvalues; k++) {
if (pselection[k]) {
if (NOT_NULL(vflag[k])) {
len = VARSIZE_ANY_EXHDR(varg->m_vals[k]);
len = getTrueLen ? bcTruelen((BpChar*)varg->m_vals[k]) : VARSIZE_ANY_EXHDR(varg->m_vals[k]);
if (eml != 1)
len = pg_mbstrlen_with_len_eml(VARDATA_ANY(varg->m_vals[k]), len, eml);
vresult->m_vals[k] = Int32GetDatum(len);
@ -1805,7 +1806,7 @@ ScalarVector* vbpcharlen(PG_FUNCTION_ARGS)
} else {
for (k = 0; k < nvalues; k++) {
if (NOT_NULL(vflag[k])) {
len = VARSIZE_ANY_EXHDR(varg->m_vals[k]);
len = getTrueLen ? bcTruelen((BpChar*)varg->m_vals[k]) : VARSIZE_ANY_EXHDR(varg->m_vals[k]);
if (eml != 1)
len = pg_mbstrlen_with_len_eml(VARDATA_ANY(varg->m_vals[k]), len, eml);
vresult->m_vals[k] = Int32GetDatum(len);

View File

@ -48,12 +48,32 @@ int Wrapmbstrlen(int str_len, char* str_data)
}
/*
* @Description : Wrap the 'Wrapmbstrlen' function in LLVM
* @Description : The simple case of bcTruelen.
* @in strlen : the length of strdata in bytes.
* @in strdata : the cstring data.
* @return : "True" length (not counting trailing blanks)
*/
int WrapBcTrueLen(int str_len, char* str_data)
{
int i = str_len - 1;
for (; i >= 0; i--) {
if (str_data[i] != ' ') {
break;
}
}
return i + 1;
}
/*
* @Description : Wrap the 'funcAddr' function in LLVM, used to get str length in specific method
* @in ptrbuilder : LLVM builder structure used to call the IR function.
* @in strlen : the length of the cstring in LLVM assemble.
* @in strdata : the cstring data in LLVM assemble.
* @in funcName : llvm function name
* @in funcAddr : llvm function address
*/
llvm::Value* WrapmbstrlenCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value* str_len, llvm::Value* str_data)
llvm::Value* WrapstrlenCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value* str_len, llvm::Value* str_data,
const char* funcName, void* funcAddr)
{
GsCodeGen* llvmCodeGen = (GsCodeGen*)t_thrd.codegen_cxt.thr_codegen_obj;
llvm::LLVMContext& context = llvmCodeGen->context();
@ -64,13 +84,13 @@ llvm::Value* WrapmbstrlenCodeGen(GsCodeGen::LlvmBuilder* ptrbuilder, llvm::Value
llvm::Value* result = NULL;
llvm::Function* jitted_wrapmbstrlen = llvmCodeGen->module()->getFunction("LLVMIRWrapmbstrlen");
llvm::Function* jitted_wrapmbstrlen = llvmCodeGen->module()->getFunction(funcName);
if (jitted_wrapmbstrlen == NULL) {
GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, "LLVMIRWrapmbstrlen", int32Type);
GsCodeGen::FnPrototype fn_prototype(llvmCodeGen, funcName, int32Type);
fn_prototype.addArgument(GsCodeGen::NamedVariable("str_len", int32Type));
fn_prototype.addArgument(GsCodeGen::NamedVariable("str_data", int8PtrType));
jitted_wrapmbstrlen = fn_prototype.generatePrototype(NULL, NULL);
llvm::sys::DynamicLibrary::AddSymbol("LLVMIRWrapmbstrlen", (void*)Wrapmbstrlen);
llvm::sys::DynamicLibrary::AddSymbol(funcName, funcAddr);
}
result = ptrbuilder->CreateCall(jitted_wrapmbstrlen, {str_len, str_data});
@ -243,9 +263,14 @@ llvm::Function* bpcharlen_codegen(int current_encoding)
llvm::Value* lhs_val = builder.CreateCall(func_varlena, lhs_arg, "lval");
str_len = builder.CreateExtractValue(lhs_val, 0);
if (DB_IS_CMPT(PG_FORMAT | B_FORMAT)) {
str_data = builder.CreateExtractValue(lhs_val, 1);
str_len = WrapstrlenCodeGen(&builder, str_len, str_data, "LLVMIRWrapBcTrueLen", (void*)WrapBcTrueLen);
}
if (current_encoding == PG_UTF8) {
str_data = builder.CreateExtractValue(lhs_val, 1);
str_len = WrapmbstrlenCodeGen(&builder, str_len, str_data);
str_len = WrapstrlenCodeGen(&builder, str_len, str_data, "LLVMIRWrapmbstrlen", (void*)Wrapmbstrlen);
}
str_len = builder.CreateZExt(str_len, int64Type);

View File

@ -260,3 +260,73 @@ drop table force_vector_test5;
drop table force_vector_test6;
drop function func_add_sql;
drop schema force_vector_engine cascade;
-- test bpcharlen in pg compatibility
create database pg_length_cmpt_db with dbcompatibility 'PG';
\c pg_length_cmpt_db
set try_vector_engine_strategy='force';
create table force_vt_tb1 (a char(10),b varchar(10));
insert into force_vt_tb1 values('零0','零1二3');
insert into force_vt_tb1 values('','');
insert into force_vt_tb1 values('0','0');
explain(costs off) select length(a),a from force_vt_tb1 order by 1;
QUERY PLAN
----------------------------------------------
Row Adapter
-> Vector Sort
Sort Key: (length(a))
-> Vector Adapter(type: BATCH MODE)
-> Seq Scan on force_vt_tb1
(5 rows)
select length(a),a,length(b),b from force_vt_tb1 order by 1;
length | a | length | b
--------+------------+--------+--------
0 | | 0 |
1 | 0 | 1 | 0
2 | 零0 | 4 | 零1二3
(3 rows)
set try_vector_engine_strategy='off';
explain(costs off) select length(a),a from force_vt_tb1 order by 1;
QUERY PLAN
--------------------------------
Sort
Sort Key: (length(a))
-> Seq Scan on force_vt_tb1
(3 rows)
select length(a),a,length(b),b from force_vt_tb1 order by 1;
length | a | length | b
--------+------------+--------+--------
0 | | 0 |
1 | 0 | 1 | 0
2 | 零0 | 4 | 零1二3
(3 rows)
-- test with column table in codegen case
create table force_vt_tb1_col (a char(10),b varchar(10)) with (orientation=column);
insert into force_vt_tb1_col values('零0','零1二3');
insert into force_vt_tb1_col values('','');
insert into force_vt_tb1_col values('0','0');
select length(a),a,length(b),b from force_vt_tb1 order by 1;
length | a | length | b
--------+------------+--------+--------
0 | | 0 |
1 | 0 | 1 | 0
2 | 零0 | 4 | 零1二3
(3 rows)
set enable_codegen to true;
set codegen_cost_threshold to 0;
select length(a),a,length(b),b from force_vt_tb1 order by 1;
length | a | length | b
--------+------------+--------+--------
0 | | 0 |
1 | 0 | 1 | 0
2 | 零0 | 4 | 零1二3
(3 rows)
drop table force_vt_tb1;
drop table force_vt_tb1_col;
\c regression;
drop database pg_length_cmpt_db;

View File

@ -82,3 +82,33 @@ drop table force_vector_test5;
drop table force_vector_test6;
drop function func_add_sql;
drop schema force_vector_engine cascade;
-- test bpcharlen in pg compatibility
create database pg_length_cmpt_db with dbcompatibility 'PG';
\c pg_length_cmpt_db
set try_vector_engine_strategy='force';
create table force_vt_tb1 (a char(10),b varchar(10));
insert into force_vt_tb1 values('零0','零1二3');
insert into force_vt_tb1 values('','');
insert into force_vt_tb1 values('0','0');
explain(costs off) select length(a),a from force_vt_tb1 order by 1;
select length(a),a,length(b),b from force_vt_tb1 order by 1;
set try_vector_engine_strategy='off';
explain(costs off) select length(a),a from force_vt_tb1 order by 1;
select length(a),a,length(b),b from force_vt_tb1 order by 1;
-- test with column table in codegen case
create table force_vt_tb1_col (a char(10),b varchar(10)) with (orientation=column);
insert into force_vt_tb1_col values('零0','零1二3');
insert into force_vt_tb1_col values('','');
insert into force_vt_tb1_col values('0','0');
select length(a),a,length(b),b from force_vt_tb1 order by 1;
set enable_codegen to true;
set codegen_cost_threshold to 0;
select length(a),a,length(b),b from force_vt_tb1 order by 1;
drop table force_vt_tb1;
drop table force_vt_tb1_col;
\c regression;
drop database pg_length_cmpt_db;