From 97f9f7d20cd1f8541530c5ff0bae1f84fdec301e Mon Sep 17 00:00:00 2001 From: li-judong Date: Mon, 26 Jun 2023 19:52:41 +0800 Subject: [PATCH] =?UTF-8?q?1=E3=80=81=E8=80=81=E9=97=AE=E9=A2=98=EF=BC=9A?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=88=86=E5=8C=BA=E5=89=AA=E6=9E=9D=E3=80=81?= =?UTF-8?q?=E5=88=86=E5=8C=BA=E8=B7=AF=E7=94=B1=E6=9C=AA=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E6=AD=A3=E7=A1=AE=E7=9A=84collation=E5=AF=BC=E8=87=B4=E7=9A=84?= =?UTF-8?q?core=E9=97=AE=E9=A2=98=E5=92=8C=E5=89=AA=E6=9E=9D=E9=97=AE?= =?UTF-8?q?=E9=A2=98=20-=20=E4=BF=AE=E5=A4=8Drange=E3=80=81list=E5=88=86?= =?UTF-8?q?=E5=8C=BA=E9=94=AE=E6=AF=94=E8=BE=83=E6=97=B6=E6=9C=AA=E6=8C=87?= =?UTF-8?q?=E5=AE=9A=E5=AD=97=E7=AC=A6=E5=BA=8F=E7=9A=84=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E3=80=82=20-=20=E4=BF=AE=E5=A4=8Dhash=E5=88=86=E5=8C=BA?= =?UTF-8?q?=E9=94=AE=E8=AE=A1=E7=AE=97hashvalue=E6=9C=AA=E4=BD=BF=E7=94=A8?= =?UTF-8?q?collation=E7=9A=84=E5=9C=BA=E6=99=AF=E3=80=82=202=E3=80=81?= =?UTF-8?q?=E4=BF=AE=E5=A4=8Dhash=E5=88=86=E5=8C=BA=E5=89=AA=E6=9E=9D?= =?UTF-8?q?=E6=97=B6=EF=BC=8Cconst->constcollid=E4=B8=8E=E5=88=86=E5=8C=BA?= =?UTF-8?q?=E9=94=AE=E4=B8=8D=E5=90=8C=E5=AF=BC=E8=87=B4=E9=80=89=E9=94=99?= =?UTF-8?q?=E5=88=86=E5=8C=BA=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=9A=20?= =?UTF-8?q?=E9=97=AE=E9=A2=98=E5=88=86=E6=9E=90=EF=BC=9AgetHashPartitionOi?= =?UTF-8?q?d=E4=B8=AD=E8=AE=A1=E7=AE=97hash=E4=BD=BF=E7=94=A8=E7=9A=84?= =?UTF-8?q?=E6=97=B6const=E4=B8=8A=E7=9A=84collation=EF=BC=8Cconst?= =?UTF-8?q?=E5=8F=AF=E8=83=BD=E7=94=B1=E6=9D=A1=E4=BB=B6=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E5=B8=B8=E9=87=8F=E4=BC=A0=E5=85=A5=EF=BC=8Cconst=E7=9A=84coll?= =?UTF-8?q?ation=E4=B8=8E=E5=88=86=E5=8C=BA=E9=94=AE=E4=B8=8D=E5=90=8C?= =?UTF-8?q?=EF=BC=8C=E5=BA=94=E5=BD=93=E4=BD=BF=E7=94=A8=E5=88=86=E5=8C=BA?= =?UTF-8?q?=E9=94=AE=E4=B8=8A=E7=9A=84collation=20=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=E6=96=B9=E6=A1=88=EF=BC=9AgetHashPartitionOid=E4=B8=AD?= =?UTF-8?q?=E6=97=A0=E6=B3=95=E6=96=B9=E4=BE=BF=E5=9C=B0=E8=8E=B7=E5=8F=96?= =?UTF-8?q?=E5=88=86=E5=8C=BA=E9=94=AE=E4=B8=8A=E7=9A=84collation=EF=BC=8C?= =?UTF-8?q?=E6=89=80=E6=9C=89=E9=9C=80=E8=A6=81=E5=88=A4=E6=96=AD=E5=88=86?= =?UTF-8?q?=E5=8C=BA=E7=9A=84const=E9=83=BD=E4=BC=9A=E8=B0=83=E7=94=A8getT?= =?UTF-8?q?argetType=E5=87=BD=E6=95=B0=EF=BC=8C=E5=9C=A8=E6=AD=A4=E5=87=BD?= =?UTF-8?q?=E6=95=B0=E4=B8=AD=E5=A4=84=E7=90=86collation=EF=BC=8C=E4=BD=BF?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E7=9A=84collation=E9=83=BD=E4=B8=8E=E5=88=86?= =?UTF-8?q?=E5=8C=BA=E9=94=AE=E7=9A=84=E7=9B=B8=E5=90=8C=E3=80=82getTarget?= =?UTF-8?q?Type=E7=9A=84src=E5=8F=82=E6=95=B0=E5=8F=AF=E8=83=BD=E6=8C=87?= =?UTF-8?q?=E5=90=91=E6=9D=A1=E4=BB=B6=E4=B8=8A=E7=9A=84const=EF=BC=8C?= =?UTF-8?q?=E4=B8=8D=E9=80=82=E5=90=88=E7=9B=B4=E6=8E=A5=E4=BF=AE=E6=94=B9?= =?UTF-8?q?=E5=85=B6const=EF=BC=8C=E7=9B=AE=E5=89=8Dcopy=E5=90=8E=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cbb/utils/partition/partitionkey.cpp | 13 +- .../cbb/utils/partition/partitionmap.cpp | 34 +-- .../db4ai/executor/algorithms/bayes/bayes.cpp | 3 +- .../optimizer/commands/tablecmds.cpp | 38 ++- .../optimizer/util/bucketpruning.cpp | 8 +- src/gausskernel/optimizer/util/pruning.cpp | 38 ++- .../process/stream/streamProducer.cpp | 12 +- src/gausskernel/runtime/executor/route.cpp | 6 +- .../storage/access/hash/hashfunc.cpp | 32 +- src/include/access/hash.h | 4 +- src/include/utils/partitionkey.h | 33 +- src/include/utils/partitionmap.h | 4 +- src/include/utils/partitionmap_gs.h | 2 +- .../regress/expected/charset_utf8mb4_b_db.out | 283 +++++++++++++++++- src/test/regress/sql/charset_utf8mb4_b_db.sql | 94 +++++- 15 files changed, 515 insertions(+), 89 deletions(-) diff --git a/src/gausskernel/cbb/utils/partition/partitionkey.cpp b/src/gausskernel/cbb/utils/partition/partitionkey.cpp index b3819c842..8e897d76e 100644 --- a/src/gausskernel/cbb/utils/partition/partitionkey.cpp +++ b/src/gausskernel/cbb/utils/partition/partitionkey.cpp @@ -276,6 +276,7 @@ List* untransformPartitionBoundary(Datum options) return result; } +/* Use value1's collation first, so the table partition key boundary must be the first parameter! */ int partitonKeyCompare(Const** value1, Const** value2, int len, bool nullEqual) { uint8 i = 0; @@ -321,7 +322,7 @@ int partitonKeyCompare(Const** value1, Const** value2, int len, bool nullEqual) break; } - constCompare(v1, v2, compare); + constCompare(v1, v2, v1->constcollid, compare); if (compare != 0) { break; } @@ -447,12 +448,12 @@ bool isPartKeyValuesInPartition(RangePartitionMap* partMap, Const** partKeyValue return greaterThanBottom && lessThanTop; } -int comparePartitionKey(RangePartitionMap* partMap, Const** values1, Const** values2, int partKeyNum) +int comparePartitionKey(RangePartitionMap* partMap, Const** partkey_value, Const** partkey_bound, int partKeyNum) { int compare = 0; incre_partmap_refcount((PartitionMap*)partMap); - partitonKeyCompareForRouting(values1, values2, (uint32)partKeyNum, compare); + partitonKeyCompareForRouting(partkey_value, partkey_bound, (uint32)partKeyNum, compare); decre_partmap_refcount((PartitionMap*)partMap); return compare; @@ -784,7 +785,7 @@ void GetPartitionOidListForRTE(RangeTblEntry *rte, RangeVar *relation) } /* function to check whether two partKey are identical */ -int ConstCompareWithNull(Const *c1, Const *c2) +int ConstCompareWithNull(Const *c1, Const *c2, Oid collation) { if (constIsNull(c1) && constIsNull(c2)) { return 0; @@ -794,7 +795,7 @@ int ConstCompareWithNull(Const *c1, Const *c2) } int compare = -1; - constCompare(c1, c2, compare); + constCompare(c1, c2, collation, compare); return compare; } @@ -813,7 +814,7 @@ int ListPartKeyCompare(PartitionKey* k1, PartitionKey* k2) } int res; for (int i = 0; i < k1->count; i++) { - res = ConstCompareWithNull(k1->values[i], k2->values[i]); + res = ConstCompareWithNull(k1->values[i], k2->values[i], k2->values[i]->constcollid); if (res != 0) { return res; } diff --git a/src/gausskernel/cbb/utils/partition/partitionmap.cpp b/src/gausskernel/cbb/utils/partition/partitionmap.cpp index df9ec6cb7..12a46013b 100755 --- a/src/gausskernel/cbb/utils/partition/partitionmap.cpp +++ b/src/gausskernel/cbb/utils/partition/partitionmap.cpp @@ -182,7 +182,7 @@ (compare) = interval_cmp_internal((arg1), (arg2)); \ } while (0) -#define constCompare_baseType(value1, value2, compare) \ +#define constCompare_baseType(value1, value2, collation, compare) \ do { \ switch ((value1)->consttype) { \ case INT2OID: \ @@ -217,26 +217,23 @@ break; \ case BPCHAROID: \ Assert((value2)->consttype == BPCHAROID); \ - Assert((value1)->constcollid == (value2)->constcollid); \ bpchar_cmp_partition(DatumGetBpCharP((value1)->constvalue), \ DatumGetBpCharP((value2)->constvalue), \ - (value1)->constcollid, \ + (collation), \ (compare)); \ break; \ case VARCHAROID: \ Assert((value2)->consttype == VARCHAROID); \ - Assert((value1)->constcollid == (value2)->constcollid); \ text_cmp_partition(DatumGetTextP((value1)->constvalue), \ DatumGetTextP((value2)->constvalue), \ - (value1)->constcollid, \ + (collation), \ (compare)); \ break; \ case TEXTOID: \ Assert((value2)->consttype == TEXTOID); \ - Assert((value1)->constcollid == (value2)->constcollid); \ text_cmp_partition(DatumGetTextP((value1)->constvalue), \ DatumGetTextP((value2)->constvalue), \ - (value1)->constcollid, \ + (collation), \ (compare)); \ break; \ case DATEOID: \ @@ -264,7 +261,7 @@ DatumGetIntervalP((value1)->constvalue), DatumGetIntervalP((value2)->constvalue), (compare)); \ break; \ default: \ - (compare) = constCompare_constType((value1), (value2)); \ + (compare) = constCompare_constType((value1), (value2), (collation)); \ break; \ } \ } while (0) @@ -275,7 +272,7 @@ * @Param[OUT] value2: right value to compare * @See also: */ -void constCompare(Const* value1, Const* value2, int& compare) +void constCompare(Const* value1, Const* value2, Oid collation, int& compare) { if (t_thrd.utils_cxt.gValueCompareContext == NULL) { /* @@ -300,9 +297,9 @@ void constCompare(Const* value1, Const* value2, int& compare) PG_TRY(); { if (value1->consttype == value2->consttype) { - constCompare_baseType(value1, value2, compare); + constCompare_baseType(value1, value2, collation, compare); } else { - compare = constCompare_constType(value1, value2); + compare = constCompare_constType(value1, value2, collation); } } PG_CATCH(); @@ -2395,8 +2392,8 @@ Oid getHashPartitionOid(PartitionMap* partMap, Const** partKeyValue, int32* part decre_partmap_refcount(partMap); return result; } - hash_value = hashValueCombination(hash_value, partKeyValue[i]->consttype, partKeyValue[i]->constvalue, false, - LOCATOR_TYPE_HASH); + hash_value = hashValueCombination(hash_value, partKeyValue[i]->consttype, partKeyValue[i]->constvalue, + false, LOCATOR_TYPE_HASH, partKeyValue[i]->constcollid); i++; } @@ -2772,7 +2769,7 @@ int partOidGetPartSequence(Relation rel, Oid partOid) * @return 0: value1==value2 1:value1>vlaue2 -1:value1", (Node*)value1, (Node*)value2, -1); gtExpr = (Expr*)transformExpr(pstate, (Node*)gtExpr, EXPR_KIND_PARTITION_EXPRESSION); - ((OpExpr*)gtExpr)->inputcollid = value1->constcollid; - assign_expr_collations(pstate, (Node*)gtExpr); + ((OpExpr*)gtExpr)->inputcollid = collation; estate = CreateExecutorState(); econtext = GetPerTupleExprContext(estate); @@ -3094,7 +3090,9 @@ Const **transformConstIntoPartkeyType(FormData_pg_attribute *attrs, int2vector * for (int i = 0; i < len; i++) { partKeyPos = partitionKey->values[i]; - if (likely(attrs[partKeyPos - 1].atttypid == boundary[i]->consttype) || boundary[i]->ismaxvalue) { + if ((likely(attrs[partKeyPos - 1].atttypid == boundary[i]->consttype) && + likely(attrs[partKeyPos - 1].attcollation == boundary[i]->constcollid)) || + boundary[i]->ismaxvalue) { continue; } diff --git a/src/gausskernel/dbmind/db4ai/executor/algorithms/bayes/bayes.cpp b/src/gausskernel/dbmind/db4ai/executor/algorithms/bayes/bayes.cpp index 456f2bbe9..ddaf33844 100644 --- a/src/gausskernel/dbmind/db4ai/executor/algorithms/bayes/bayes.cpp +++ b/src/gausskernel/dbmind/db4ai/executor/algorithms/bayes/bayes.cpp @@ -53,7 +53,8 @@ ValueInTuple create_value(Datum data, Oid type, bool isnull) { uint32_t hashvalue = 0; if (!isnull) { - hashvalue = DatumGetUInt32(compute_hash(type, data, LOCATOR_TYPE_HASH)); + /* As before, used invalid collation. B format collation is not supported yet. */ + hashvalue = DatumGetUInt32(compute_hash(type, data, LOCATOR_TYPE_HASH, InvalidOid)); } ValueInTuple val = {data, type, isnull, hashvalue}; return val; diff --git a/src/gausskernel/optimizer/commands/tablecmds.cpp b/src/gausskernel/optimizer/commands/tablecmds.cpp index 6e671fefb..c747cba5b 100755 --- a/src/gausskernel/optimizer/commands/tablecmds.cpp +++ b/src/gausskernel/optimizer/commands/tablecmds.cpp @@ -22759,6 +22759,30 @@ Node* GetTargetValue(Form_pg_attribute attrs, Const* src, bool isinterval, bool return NULL; } + /* convert source const's charset to target partkey's charset */ + if (!partkeyIsFunc && DB_IS_CMPT(B_FORMAT) && OidIsValid(attrs->attcollation)) { + assign_expr_collations(NULL, expr); + if (attrs->attcollation != exprCollation(expr)) { + int attcharset = get_valid_charset_by_collation(attrs->attcollation); + expr = coerce_to_target_charset(expr, attcharset, target_oid, target_mod, attrs->attcollation); + + Assert(expr != NULL); + if (!IsA(expr, Const)) { + expr = (Node*)evaluate_expr((Expr*)expr, target_oid, target_mod, attrs->attcollation); + } else if (attrs->attcollation != exprCollation(expr)) { + if (expr == (Node*)src) { + /* We are not sure where src comes from, avoid set src->constcollid directly. */ + expr = (Node*)copyObject((void*)src); + } + /* + * The expr is used to compute hash or compare it with the partition boundary. + * Set the correct collation to ensure the correctness of the partition pruning and routing. + */ + exprSetCollation(expr, attrs->attcollation); + } + } + } + switch (nodeTag(expr)) { /* do nothing for Const */ case T_Const: @@ -22942,7 +22966,7 @@ static void sqlcmd_check_list_partition_have_duplicate_values(List** key_values_ ListCell* c2 = NULL; for (int k = 0; k < bound_idx; ++k) { forboth (c1, key_values_array[part_idx][bound_idx], c2, key_values_array[part_idx][k]) { - if (ConstCompareWithNull((Const*)lfirst(c1), (Const*)lfirst(c2)) != 0) { + if (ConstCompareWithNull((Const*)lfirst(c1), (Const*)lfirst(c2), ((Const*)lfirst(c2))->constcollid) != 0) { break; } } @@ -22968,7 +22992,7 @@ static void sqlcmd_check_two_list_partition_values_overlapped(List** key_values_ Assert(!(con1->ismaxvalue && con2->ismaxvalue)); break; } - if (ConstCompareWithNull(con1, con2) != 0) { + if (ConstCompareWithNull(con1, con2, con2->constcollid) != 0) { break; } } @@ -23518,7 +23542,7 @@ static void CheckPartitionValueConflictForAddPartition(Relation rel, Node *partD RangePartitionMap *partMap = (RangePartitionMap *)rel->partMap; Const *curBound = (Const *)copyObject(partMap->rangeElements[partNum - 1].boundary[0]); Const *val = partDef->curStartVal; - if (!curBound->ismaxvalue && val != NULL && partitonKeyCompare(&val, &curBound, 1) != 0) { + if (!curBound->ismaxvalue && val != NULL && partitonKeyCompare(&curBound, &val, 1) != 0) { ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("start value of partition \"%s\" NOT EQUAL up-boundary of last partition.", partDef->partitionInitName ? partDef->partitionInitName : partDef->partitionName))); @@ -27885,14 +27909,14 @@ static void ATExecSplitPartition(Relation partTableRel, AlterTableCmd* cmd) // check the first dest partition boundary if (srcPartIndex != 0) { if (!partMap->rangeElements[srcPartIndex].isInterval) { - compare = comparePartitionKey(partMap, partMap->rangeElements[srcPartIndex - 1].boundary, - (Const**)lfirst(list_head(destPartBoundaryList)), partKeyNum); + compare = comparePartitionKey(partMap, (Const**)lfirst(list_head(destPartBoundaryList)), + partMap->rangeElements[srcPartIndex - 1].boundary, partKeyNum); } else { Const** partKeyValue = (Const**)lfirst(list_head(destPartBoundaryList)); RangeElement& srcPartition = partMap->rangeElements[srcPartIndex]; - compare = -ValueCmpLowBoudary(partKeyValue, &srcPartition, partMap->intervalValue); + compare = ValueCmpLowBoudary(partKeyValue, &srcPartition, partMap->intervalValue); } - if (compare >= 0) { + if (compare <= 0) { ereport(ERROR, (errcode(ERRCODE_INVALID_OPERATION), errmsg("the bound of the first resulting partition is too low"))); diff --git a/src/gausskernel/optimizer/util/bucketpruning.cpp b/src/gausskernel/optimizer/util/bucketpruning.cpp index 86319c7cc..d0afed49c 100644 --- a/src/gausskernel/optimizer/util/bucketpruning.cpp +++ b/src/gausskernel/optimizer/util/bucketpruning.cpp @@ -88,7 +88,7 @@ static BucketPruningContext* makePruningContext( PlannerInfo* root, RelOptInfo* rel, RangeTblEntry* rte, List* restrictInfo); static Expr* RestrictInfoGetExpr(List* restrictInfo); static BucketPruningResult* BucketPruningForExpr(BucketPruningContext* bpcxt, Expr* expr); -static int getConstBucketId(Const* val, int bucketmapsize); +static int getConstBucketId(Const* val, Oid collation, int bucketmapsize); static BucketPruningResult* BucketPruningForBoolExpr(BucketPruningContext* bpcxt, BoolExpr* expr); static BucketPruningResult* BucketPruningForOpExpr(BucketPruningContext* bpcxt, OpExpr* expr); static int GetExecBucketId(ExecNodes* exec_nodes, ParamListInfo params); @@ -635,7 +635,7 @@ static BucketPruningResult* BucketPruningForOpExpr(BucketPruningContext* bpcxt, } /* time for pruning */ - int id = getConstBucketId(constArg, bpcxt->rte->bucketmapsize); + int id = getConstBucketId(constArg, varArg->varcollid, bpcxt->rte->bucketmapsize); if (pg_strcasecmp(opName, "=") == 0) { return makePruningResult(id); @@ -653,7 +653,7 @@ static BucketPruningResult* BucketPruningForOpExpr(BucketPruningContext* bpcxt, * @in Const: the const val which we want the bucketid of it * @return the bucketid index of the const val */ -static int getConstBucketId(Const* val, int bucketmapsize) +static int getConstBucketId(Const* val, Oid collation, int bucketmapsize) { uint32 hashval = 0; int bucketid = 0; @@ -662,7 +662,7 @@ static int getConstBucketId(Const* val, int bucketmapsize) return 0; } - hashval = compute_hash(val->consttype, val->constvalue, LOCATOR_TYPE_HASH); + hashval = compute_hash(val->consttype, val->constvalue, LOCATOR_TYPE_HASH, collation); bucketid = compute_modulo((unsigned int)(abs((int)hashval)), bucketmapsize); diff --git a/src/gausskernel/optimizer/util/pruning.cpp b/src/gausskernel/optimizer/util/pruning.cpp index 79c9580b8..369b24519 100644 --- a/src/gausskernel/optimizer/util/pruning.cpp +++ b/src/gausskernel/optimizer/util/pruning.cpp @@ -1179,6 +1179,13 @@ static PruningResult* partitionPruningFromScalarArrayOpExpr return result; } + /* Do not pruning if collation of operator is different from collation of partkey. */ + if (((Var*)larg)->varcollid != arrayExpr->inputcollid) { + result->state = PRUNING_RESULT_FULL; + result->isPbeSinlePartition = false; + return result; + } + if (T_ArrayExpr == nodeTag(rarg)) { List* eleList = NULL; ListCell* element = NULL; @@ -1305,6 +1312,13 @@ static PruningResult* partitionPruningFromScalarArrayOpExpr(PartitionType partTy return result; } + /* Do not pruning if collation of operator is different from collation of partkey. */ + if (((Var*)larg)->varcollid != arrayExpr->inputcollid) { + result->state = PRUNING_RESULT_FULL; + result->isPbeSinlePartition = false; + return result; + } + if (T_ArrayExpr == nodeTag(rarg)) { List* eleList = NULL; ListCell* element = NULL; @@ -1540,6 +1554,13 @@ static PruningResult* recordBoundaryFromOpExpr(const OpExpr* expr, PruningContex } } + /* Do not pruning if collation of operator is different from collation of partkey. */ + if (varArg->varcollid != expr->inputcollid) { + result->state = PRUNING_RESULT_FULL; + result->isPbeSinlePartition = false; + return result; + } + /* Var's column MUST belongs to parition key columns */ partMap = (RangePartitionMap*)(GetPartitionMap(context)); @@ -1832,6 +1853,13 @@ static PruningResult* recordEqualFromOpExpr(PartitionType partType, const OpExpr } } + /* Do not pruning if collation of operator is different from collation of partkey. */ + if (varArg->varcollid != expr->inputcollid) { + result->state = PRUNING_RESULT_FULL; + result->isPbeSinlePartition = false; + return result; + } + PruningResult* res = RecordEqualFromOpExprPart(partType, context, opName, constMax, varArg, attrOffset, result, paramArg, exprPart, constArg, boundary); return res; } @@ -2388,8 +2416,8 @@ static void cleanPruningBottom(PruningContext *context, PartitionIdentifier* bot RangeElement* range = partMap->rangeElements + i; int compare = 0; - compare = partitonKeyCompare(&value, range->boundary, 1); - if (compare >= 0) { + compare = partitonKeyCompare(range->boundary, &value, 1); + if (compare <= 0) { continue; } else { break; @@ -2421,8 +2449,8 @@ static void cleanPruningTop(PruningContext *context, PartitionIdentifier* topSeq RangeElement* range = partMap->rangeElements + i; int compare = 0; - compare = partitonKeyCompare(&value, range->boundary, 1); - if (compare <= 0) { + compare = partitonKeyCompare(range->boundary, &value, 1); + if (compare >= 0) { continue; } else { break; @@ -2708,7 +2736,7 @@ static bool PartialListBoundaryMatched(ListPartElement* part, List* keyPos, Cons ListCell* keyCell = NULL; foreach(keyCell, keyPos) { int id = lfirst_int(keyCell); - if (ConstCompareWithNull(keyValue[id], bound->values[id]) != 0) { + if (ConstCompareWithNull(keyValue[id], bound->values[id], bound->values[id]->constcollid) != 0) { break; } } diff --git a/src/gausskernel/process/stream/streamProducer.cpp b/src/gausskernel/process/stream/streamProducer.cpp index 1d5d2da49..d7f476f84 100755 --- a/src/gausskernel/process/stream/streamProducer.cpp +++ b/src/gausskernel/process/stream/streamProducer.cpp @@ -1980,11 +1980,11 @@ static int GetListConsumerNodeIdx(ExecBoundary* enBoundary, Const** values, int while (maxId >= minId) { midId = ((uint)minId + (uint)maxId) >> 1; - cmp = partitonKeyCompare(values, enBoundary->eles[midId]->boundary, distLen); + cmp = partitonKeyCompare(enBoundary->eles[midId]->boundary, values, distLen); if (cmp == 0) { hit = midId; break; - } else if (cmp < 0) { + } else if (cmp > 0) { maxId = midId - 1; } else { minId = midId + 1; @@ -2009,18 +2009,18 @@ static int GetRangeConsumerNodeIdx(ExecBoundary* enBoundary, Const** values, int maxId = enBoundary->count - 1; minId = 0; hit = -1; - cmp = partitonKeyCompare(values, enBoundary->eles[maxId]->boundary, distLen); - if (cmp >= 0) { + cmp = partitonKeyCompare(enBoundary->eles[maxId]->boundary, values, distLen); + if (cmp <= 0) { hit = -1; } else { while (maxId > minId) { midId = ((uint)minId + (uint)maxId) >> 1; - cmp = partitonKeyCompare(values, enBoundary->eles[midId]->boundary, distLen); + cmp = partitonKeyCompare(enBoundary->eles[midId]->boundary, values, distLen); if (cmp == 0) { hit = midId + 1; break; - } else if (cmp > 0) { + } else if (cmp < 0) { minId = midId + 1; } else { maxId = midId; diff --git a/src/gausskernel/runtime/executor/route.cpp b/src/gausskernel/runtime/executor/route.cpp index 8a3cf9cb1..7e3b789eb 100644 --- a/src/gausskernel/runtime/executor/route.cpp +++ b/src/gausskernel/runtime/executor/route.cpp @@ -333,6 +333,7 @@ int SendRouter::CountNodeId(struct RouteMsg* attr) Oid* col_type = (Oid*)palloc0(dist_keynum * sizeof(Oid)); int4* col_typemode = (int4*)palloc0(dist_keynum * sizeof(int4)); + Oid* col_conllation = (Oid*)palloc0(dist_keynum * sizeof(Oid)); int i = 0; int t = 0; int dist_idx = list_nth_int(rel_loc_info->partAttrNum, t) - 1; @@ -341,6 +342,7 @@ int SendRouter::CountNodeId(struct RouteMsg* attr) col_type[t] = tupdesc->attrs[i].atttypid; Type typ = typeidType(col_type[t]); col_typemode[t] = ((Form_pg_type)GETSTRUCT(typ))->typtypmod; + col_conllation[t] = tupdesc->attrs[i].attcollation; ReleaseSysCache(typ); t++; if (t >= dist_keynum) @@ -358,7 +360,8 @@ int SendRouter::CountNodeId(struct RouteMsg* attr) col_type[i], GetDatumFromString(col_type[i], col_typemode[i], (char *)lfirst(cell)), false, - rel_loc_info->locatorType); + rel_loc_info->locatorType, + col_conllation[i]); i++; } if (unlikely(rel_loc_info->buckets_ptr == NULL)) { @@ -370,6 +373,7 @@ int SendRouter::CountNodeId(struct RouteMsg* attr) pfree_ext(col_type); pfree_ext(col_typemode); + pfree_ext(col_conllation); heap_close(rel, AccessShareLock); if ((node_id >= list_length(rel_loc_info->nodeList)) || node_id < 0) { diff --git a/src/gausskernel/storage/access/hash/hashfunc.cpp b/src/gausskernel/storage/access/hash/hashfunc.cpp index fd99b4f20..3be2db75d 100644 --- a/src/gausskernel/storage/access/hash/hashfunc.cpp +++ b/src/gausskernel/storage/access/hash/hashfunc.cpp @@ -609,7 +609,7 @@ Datum hash_uint32(uint32 k) /* * compute_hash() -- Generic hash function for all datatypes */ -Datum compute_hash(Oid type, Datum value, char locator) +Datum compute_hash(Oid type, Datum value, char locator, Oid collation) { uint8 tmp8; int16 tmp16; @@ -664,7 +664,7 @@ Datum compute_hash(Oid type, Datum value, char locator) case NVARCHAR2OID: case VARCHAROID: case TEXTOID: - return DirectFunctionCall1(hashtext, value); + return DirectFunctionCall1Coll(hashtext, collation, value); case OIDVECTOROID: return DirectFunctionCall1(hashoidvector, value); @@ -687,7 +687,7 @@ Datum compute_hash(Oid type, Datum value, char locator) return DirectFunctionCall1(hashint8, value); case BPCHAROID: - return DirectFunctionCall1(hashbpchar, value); + return DirectFunctionCall1Coll(hashbpchar, collation, value); case RAWOID: case BYTEAOID: @@ -731,13 +731,13 @@ Datum compute_hash(Oid type, Datum value, char locator) return (Datum)0; } -uint32 hashValueCombination(uint32 hashValue, Oid colType, Datum val, bool allIsNull, char locatorType) +uint32 hashValueCombination(uint32 hashValue, Oid colType, Datum val, bool allIsNull, char locatorType, Oid collation) { if (!allIsNull) { hashValue = (hashValue << 1) | ((hashValue & 0x80000000) ? 1 : 0); - hashValue ^= (uint32)compute_hash(colType, val, locatorType); + hashValue ^= (uint32)compute_hash(colType, val, locatorType, collation); } else { - hashValue = (uint32)compute_hash(colType, val, locatorType); + hashValue = (uint32)compute_hash(colType, val, locatorType, collation); } return hashValue; } @@ -933,11 +933,12 @@ Datum bucketbpchar(PG_FUNCTION_ARGS) Datum value = PG_GETARG_DATUM(0); int flag = PG_GETARG_INT32(1); long hashValue; + int collation = is_b_format_collation(PG_GET_COLLATION()) ? PG_GET_COLLATION() : InvalidOid; if (flag == 0) { /* hash */ - hashValue = (long)compute_hash(BPCHAROID, value, LOCATOR_TYPE_HASH); + hashValue = (long)compute_hash(BPCHAROID, value, LOCATOR_TYPE_HASH, collation); } else { - hashValue = (long)compute_hash(BPCHAROID, value, LOCATOR_TYPE_MODULO); + hashValue = (long)compute_hash(BPCHAROID, value, LOCATOR_TYPE_MODULO, collation); } bucket = compute_modulo(abs(hashValue), BUCKETDATALEN); result = Int32GetDatum(bucket); @@ -980,11 +981,12 @@ Datum bucketvarchar(PG_FUNCTION_ARGS) Datum value = PG_GETARG_DATUM(0); int flag = PG_GETARG_INT32(1); long hashValue; + int collation = is_b_format_collation(PG_GET_COLLATION()) ? PG_GET_COLLATION() : InvalidOid; if (flag == 0) { /* hash */ - hashValue = (long)compute_hash(VARCHAROID, value, LOCATOR_TYPE_HASH); + hashValue = (long)compute_hash(VARCHAROID, value, LOCATOR_TYPE_HASH, collation); } else { - hashValue = (long)compute_hash(VARCHAROID, value, LOCATOR_TYPE_MODULO); + hashValue = (long)compute_hash(VARCHAROID, value, LOCATOR_TYPE_MODULO, collation); } bucket = compute_modulo(abs(hashValue), BUCKETDATALEN); result = Int32GetDatum(bucket); @@ -1004,11 +1006,12 @@ Datum bucketnvarchar2(PG_FUNCTION_ARGS) Datum value = PG_GETARG_DATUM(0); int flag = PG_GETARG_INT32(1); long hashValue; + int collation = is_b_format_collation(PG_GET_COLLATION()) ? PG_GET_COLLATION() : InvalidOid; if (flag == 0) { /* hash */ - hashValue = (long)compute_hash(NVARCHAR2OID, value, LOCATOR_TYPE_HASH); + hashValue = (long)compute_hash(NVARCHAR2OID, value, LOCATOR_TYPE_HASH, collation); } else { - hashValue = (long)compute_hash(NVARCHAR2OID, value, LOCATOR_TYPE_MODULO); + hashValue = (long)compute_hash(NVARCHAR2OID, value, LOCATOR_TYPE_MODULO, collation); } bucket = compute_modulo(abs(hashValue), BUCKETDATALEN); result = Int32GetDatum(bucket); @@ -1436,11 +1439,12 @@ Datum buckettext(PG_FUNCTION_ARGS) Datum value = PG_GETARG_DATUM(0); int flag = PG_GETARG_INT32(1); long hashValue; + int collation = is_b_format_collation(PG_GET_COLLATION()) ? PG_GET_COLLATION() : InvalidOid; if (flag == 0) { /* hash */ - hashValue = (long)compute_hash(TEXTOID, value, LOCATOR_TYPE_HASH); + hashValue = (long)compute_hash(TEXTOID, value, LOCATOR_TYPE_HASH, collation); } else { - hashValue = (long)compute_hash(TEXTOID, value, LOCATOR_TYPE_MODULO); + hashValue = (long)compute_hash(TEXTOID, value, LOCATOR_TYPE_MODULO, collation); } bucket = compute_modulo(abs(hashValue), BUCKETDATALEN); result = Int32GetDatum(bucket); diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 8fab8aef0..ab74d6c75 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -416,8 +416,8 @@ extern void hashbucketcleanup(Relation rel, Bucket cur_bucket, IndexBulkDeleteCallback callback, void *callback_state); #ifdef PGXC -extern Datum compute_hash(Oid type, Datum value, char locator); -extern uint32 hashValueCombination(uint32 hashValue, Oid colType, Datum val, bool allIsNull, char locatorType = 'H'); +extern Datum compute_hash(Oid type, Datum value, char locator, Oid collation = InvalidOid); +extern uint32 hashValueCombination(uint32 hashValue, Oid colType, Datum val, bool allIsNull, char locatorType = 'H', Oid collation = InvalidOid); extern char* get_compute_hash_function(Oid type, char locator); extern Datum getbucket(PG_FUNCTION_ARGS); extern Datum getbucketbycnt(PG_FUNCTION_ARGS); diff --git a/src/include/utils/partitionkey.h b/src/include/utils/partitionkey.h index 17d070dd0..c2be68ccf 100644 --- a/src/include/utils/partitionkey.h +++ b/src/include/utils/partitionkey.h @@ -64,48 +64,49 @@ extern bool GetPartitionOidForRTE(RangeTblEntry *rte, RangeVar *relation, ParseS extern bool GetSubPartitionOidForRTE(RangeTblEntry *rte, RangeVar *relation, ParseState *pstate, Relation rel); extern void GetPartitionOidListForRTE(RangeTblEntry *rte, RangeVar *relation); -#define partitonKeyCompareForRouting(value1, value2, len, compare) \ +/* the 2nd parameter must be partition boundary */ +#define partitonKeyCompareForRouting(partkey_value, partkey_bound, len, compare) \ do { \ uint32 i = 0; \ - Const *v1 = NULL; \ - Const *v2 = NULL; \ + Const *kv = NULL; \ + Const *bv = NULL; \ for (; i < (len); i++) { \ - v1 = *((value1) + i); \ - v2 = *((value2) + i); \ - if (v1 == NULL || v2 == NULL) { \ - if (v1 == NULL && v2 == NULL) { \ + kv = *((partkey_value) + i); \ + bv = *((partkey_bound) + i); \ + if (kv == NULL || bv == NULL) { \ + if (kv == NULL && bv == NULL) { \ ereport(ERROR, \ (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("NULL can not be compared with NULL"))); \ - } else if (v1 == NULL) { \ + } else if (kv == NULL) { \ compare = -1; \ } else { \ compare = 1; \ } \ break; \ } \ - if (constIsMaxValue(v1) || constIsMaxValue(v2)) { \ - if (constIsMaxValue(v1) && constIsMaxValue(v2)) { \ + if (constIsMaxValue(kv) || constIsMaxValue(bv)) { \ + if (constIsMaxValue(kv) && constIsMaxValue(bv)) { \ compare = 0; \ continue; \ - } else if (constIsMaxValue(v1)) { \ + } else if (constIsMaxValue(kv)) { \ compare = 1; \ } else { \ compare = -1; \ } \ break; \ } \ - if (v1->constisnull || v2->constisnull) { \ - if (v1->constisnull && v2->constisnull) { \ + if (kv->constisnull || bv->constisnull) { \ + if (kv->constisnull && bv->constisnull) { \ ereport(ERROR, (errcode(ERRCODE_UNEXPECTED_NULL_VALUE), \ errmsg("null value can not be compared with null value."))); \ - } else if (v1->constisnull) { \ + } else if (kv->constisnull) { \ compare = 1; \ } else { \ compare = -1; \ } \ break; \ } \ - constCompare(v1, v2, compare); \ + constCompare(kv, bv, bv->constcollid, compare); \ if ((compare) != 0) { \ break; \ } \ @@ -117,5 +118,5 @@ extern void GetPartitionOidListForRTE(RangeTblEntry *rte, RangeVar *relation); #define constIsNull(x) ((x)->constisnull) #define constIsMaxValue(x) ((x)->ismaxvalue) -int ConstCompareWithNull(Const *c1, Const *c2); +int ConstCompareWithNull(Const *c1, Const *c2, Oid collation); int ListPartKeyCompare(PartitionKey* k1, PartitionKey* k2); \ No newline at end of file diff --git a/src/include/utils/partitionmap.h b/src/include/utils/partitionmap.h index 74148b0ac..d1504e97b 100644 --- a/src/include/utils/partitionmap.h +++ b/src/include/utils/partitionmap.h @@ -136,11 +136,11 @@ extern int GetSubPartitionNumber(Relation rel); extern bool targetListHasPartitionKey(List* targetList, Oid partitiondtableid); -extern int constCompare_constType(Const* value1, Const* value2); +extern int constCompare_constType(Const* value1, Const* value2, Oid collation); extern bool partitionHasToast(Oid partOid); -extern void constCompare(Const* value1, Const* value2, int& compare); +extern void constCompare(Const* value1, Const* value2, Oid collation, int& compare); extern struct ListPartElement* CopyListElements(ListPartElement* src, int elementNum); extern struct HashPartElement* CopyHashElements(HashPartElement* src, int elementNum, int partkeyNum); diff --git a/src/include/utils/partitionmap_gs.h b/src/include/utils/partitionmap_gs.h index e9f8db4c1..a76fb4156 100755 --- a/src/include/utils/partitionmap_gs.h +++ b/src/include/utils/partitionmap_gs.h @@ -508,7 +508,7 @@ extern bool EqualPartitonMap(const PartitionMap* partMap1, const PartitionMap* p bool isPartKeyValuesInPartition(RangePartitionMap* partMap, Const** partKeyValues, int partkeyColumnNum, int partSeq); -extern int comparePartitionKey(RangePartitionMap* partMap, Const** values1, Const** values2, int partKeyNum); +extern int comparePartitionKey(RangePartitionMap* partMap, Const** partkey_value, Const** partkey_bound, int partKeyNum); extern int lookupHBucketid(oidvector *buckets, int low, int2 bucket_id); diff --git a/src/test/regress/expected/charset_utf8mb4_b_db.out b/src/test/regress/expected/charset_utf8mb4_b_db.out index 8fdc86aae..736fe0c22 100644 --- a/src/test/regress/expected/charset_utf8mb4_b_db.out +++ b/src/test/regress/expected/charset_utf8mb4_b_db.out @@ -4025,8 +4025,140 @@ CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_c partition p3 values less than(MAXVALUE) ); -- ERROR ERROR: partition bound of partition "p2" is too low +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY RANGE(part) ( + partition p1 values less than('楂樻柉DB'), + partition p2 values less than(_gbk'高斯db'), + partition p3 values less than(MAXVALUE) +); -- ERROR +ERROR: partition bound of partition "p2" is too low +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY LIST(part) ( + partition p1 values('高斯DB'), + partition p2 values('高斯db') +); -- ERROR +ERROR: list partition p1 and p2 has overlapped value +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY RANGE(part) ( + PARTITION pass START('高斯DB') END('高斯db'), + PARTITION excellent START('高斯db') END(MAXVALUE) +); -- unsupported +ERROR: datatype of column "part" is unsupported for partition key in start/end clause. +HINT: Valid datatypes are: smallint, int, bigint, float4/real, float8/double, numeric, date and timestamp [with time zone]. -- -- -- utf8mb4 -CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_bin, a int) +CREATE TABLE t_multi_charset_partkey (part text collate utf8mb4_bin, a int) + PARTITION BY HASH(part) ( + partition p1, + partition p2, + partition p3, + partition p4 +); +-- -- -- insert +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯DB', 1); +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯db', 2); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯DB1', 3); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯db1', 4); +-- -- -- select +SELECT * FROM t_multi_charset_partkey PARTITION(p1) order by 1,2; + part | a +----------+--- + 楂樻柉DB | 1 + 楂樻柉db | 2 +(2 rows) + +SELECT * FROM t_multi_charset_partkey PARTITION(p2) order by 1,2; + part | a +---------+--- + 高斯db1 | 4 +(1 row) + +SELECT * FROM t_multi_charset_partkey PARTITION(p3) order by 1,2; + part | a +------+--- +(0 rows) + +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_gbk'高斯db') order by 1,2; + part | a +----------+--- + 楂樻柉DB | 1 + 楂樻柉db | 2 +(2 rows) + +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_utf8mb4'高斯DB1') order by 1,2; + part | a +---------+--- + 高斯DB1 | 3 +(1 row) + +-- -- -- partition pruning +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; + QUERY PLAN +--------------------------------------------------------------- + Sort + Sort Key: a + -> Partitioned Seq Scan on t_multi_charset_partkey + Filter: (part = '楂樻柉DB'::text COLLATE utf8mb4_bin) + Selected Partitions: 1 +(5 rows) + +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; + part | a +----------+--- + 楂樻柉DB | 1 +(1 row) + +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db1' order by 1,2; + QUERY PLAN +------------------------------------------------------- + Sort + Sort Key: a + -> Partitioned Seq Scan on t_multi_charset_partkey + Filter: (part = '高斯db1'::text) + Selected Partitions: 2 +(5 rows) + +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db1' order by 1,2; + part | a +---------+--- + 高斯db1 | 4 +(1 row) + +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; -- ALL PARTS + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: part COLLATE utf8mb4_bin, a + -> Partition Iterator + Iterations: 4 + -> Partitioned Seq Scan on t_multi_charset_partkey + Filter: ((part)::text = '楂樻柉DB'::text) + Selected Partitions: 1..4 +(7 rows) + +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; + part | a +----------+--- + 楂樻柉DB | 1 + 楂樻柉db | 2 +(2 rows) + +-- -- -- partiton ddl +ALTER TABLE t_multi_charset_partkey SPLIT PARTITION p1 AT ( '高斯DB' ) INTO ( PARTITION p1, PARTITION p4); -- not support +ERROR: can not split LIST/HASH partition table +ALTER TABLE t_multi_charset_partkey RENAME PARTITION FOR(_gbk'高斯db') TO newp1; +SELECT * FROM t_multi_charset_partkey PARTITION(newp1) order by 1,2; + part | a +----------+--- + 楂樻柉DB | 1 + 楂樻柉db | 2 +(2 rows) + +DROP TABLE t_multi_charset_partkey; +-- -- -- utf8mb4 +CREATE TABLE t_multi_charset_partkey (part text collate utf8mb4_bin, a int) PARTITION BY RANGE(part) ( partition p1 values less than('楂樻柉DB'), partition p2 values less than('楂樻柉db'), @@ -4069,15 +4201,27 @@ SELECT * FROM t_multi_charset_partkey PARTITION(p5) order by 1,2; 高斯db | 4 (1 row) +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_gbk'高斯db') order by 1,2; + part | a +----------+--- + 楂樻柉db | 2 +(1 row) + +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_utf8mb4'高斯DB') order by 1,2; + part | a +--------+--- + 高斯DB | 3 +(1 row) + -- -- -- partition pruning EXPLAIN (costs off) SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; - QUERY PLAN ------------------------------------------------------------------------ + QUERY PLAN +--------------------------------------------------------------- Sort Sort Key: a -> Partitioned Seq Scan on t_multi_charset_partkey - Filter: ((part)::text = '楂樻柉DB'::text COLLATE utf8mb4_bin) + Filter: (part = '楂樻柉DB'::text COLLATE utf8mb4_bin) Selected Partitions: 2 (5 rows) @@ -4094,7 +4238,7 @@ SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db' order by 1,2 Sort Sort Key: a -> Partitioned Seq Scan on t_multi_charset_partkey - Filter: ((part)::text = '高斯db'::text) + Filter: (part = '高斯db'::text) Selected Partitions: 5 (5 rows) @@ -4124,6 +4268,135 @@ SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chin 楂樻柉db | 2 (2 rows) +-- -- -- partiton ddl +ALTER TABLE t_multi_charset_partkey SPLIT PARTITION FOR(_gbk'高斯DB') AT (_gbk'高斯DB1 ') INTO (PARTITION p2_1, PARTITION p2_2); +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯DB1', 1); +SELECT * FROM t_multi_charset_partkey PARTITION(p2_1) order by 1,2; + part | a +----------+--- + 楂樻柉DB | 1 +(1 row) + +SELECT * FROM t_multi_charset_partkey PARTITION(p2_2) order by 1,2; + part | a +-----------+--- + 楂樻柉DB1 | 1 +(1 row) + +ALTER TABLE t_multi_charset_partkey RENAME PARTITION FOR(_gbk'高斯db') TO p3_1; +SELECT * FROM t_multi_charset_partkey PARTITION(p3_1) order by 1,2; + part | a +----------+--- + 楂樻柉db | 2 +(1 row) + +DROP TABLE t_multi_charset_partkey; +-- -- -- utf8mb4 +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_unicode_ci, part2 varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY LIST COLUMNS(part, part2) ( + partition p1 values in(('楂樻柉DB', '楂樻柉db')), + partition p2 values in(('高斯db', '高斯DB')) +); +-- -- -- insert +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯DB', _gbk'高斯DB', 1); +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯db', _gbk'高斯db', 2); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯DB', _utf8mb4'高斯DB', 3); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯db', _utf8mb4'高斯db', 4); +-- -- -- select +SELECT * FROM t_multi_charset_partkey PARTITION(p1) order by 1,2; + part | part2 | a +----------+----------+--- + 楂樻柉DB | 楂樻柉DB | 1 + 楂樻柉db | 楂樻柉db | 2 +(2 rows) + +SELECT * FROM t_multi_charset_partkey PARTITION(p2) order by 1,2; + part | part2 | a +--------+--------+--- + 高斯DB | 高斯DB | 3 + 高斯db | 高斯db | 4 +(2 rows) + +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_gbk'高斯DB', _gbk'高斯db') order by 1,2; + part | part2 | a +----------+----------+--- + 楂樻柉DB | 楂樻柉DB | 1 + 楂樻柉db | 楂樻柉db | 2 +(2 rows) + +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_utf8mb4'高斯db', _utf8mb4'高斯db') order by 1,2; + part | part2 | a +--------+--------+--- + 高斯DB | 高斯DB | 3 + 高斯db | 高斯db | 4 +(2 rows) + +-- -- -- partition pruning +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; + QUERY PLAN +------------------------------------------------------------------------------ + Sort + Sort Key: part2 COLLATE utf8mb4_general_ci + -> Partitioned Seq Scan on t_multi_charset_partkey + Filter: ((part)::text = '楂樻柉DB'::text COLLATE utf8mb4_unicode_ci) + Selected Partitions: 1 +(5 rows) + +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; + part | part2 | a +----------+----------+--- + 楂樻柉DB | 楂樻柉DB | 1 + 楂樻柉db | 楂樻柉db | 2 +(2 rows) + +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db' order by 1,2; + QUERY PLAN +------------------------------------------------------- + Sort + Sort Key: part2 COLLATE utf8mb4_general_ci + -> Partitioned Seq Scan on t_multi_charset_partkey + Filter: ((part)::text = '高斯db'::text) + Selected Partitions: 2 +(5 rows) + +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db' order by 1,2; + part | part2 | a +--------+--------+--- + 高斯DB | 高斯DB | 3 + 高斯db | 高斯db | 4 +(2 rows) + +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; -- ALL PARTS + QUERY PLAN +------------------------------------------------------------------------------- + Sort + Sort Key: part COLLATE utf8mb4_unicode_ci, part2 COLLATE utf8mb4_general_ci + -> Partition Iterator + Iterations: 2 + -> Partitioned Seq Scan on t_multi_charset_partkey + Filter: ((part)::text = '楂樻柉DB'::text) + Selected Partitions: 1..2 +(7 rows) + +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; + part | part2 | a +----------+----------+--- + 楂樻柉DB | 楂樻柉DB | 1 + 楂樻柉db | 楂樻柉db | 2 +(2 rows) + +-- -- -- partiton ddl +ALTER TABLE t_multi_charset_partkey RENAME PARTITION FOR(_gbk'高斯DB', _gbk'高斯db') TO p1_1; +SELECT * FROM t_multi_charset_partkey PARTITION(p1_1) order by 1,2; + part | part2 | a +----------+----------+--- + 楂樻柉DB | 楂樻柉DB | 1 + 楂樻柉db | 楂樻柉db | 2 +(2 rows) + DROP TABLE t_multi_charset_partkey; -- -- -- gbk CREATE TABLE t_multi_charset_partkey (part varchar(32) CHARACTER set gbk collate gbk_bin, a int) diff --git a/src/test/regress/sql/charset_utf8mb4_b_db.sql b/src/test/regress/sql/charset_utf8mb4_b_db.sql index 62bc47084..ab1fcb19d 100644 --- a/src/test/regress/sql/charset_utf8mb4_b_db.sql +++ b/src/test/regress/sql/charset_utf8mb4_b_db.sql @@ -1141,9 +1141,61 @@ CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_c partition p2 values less than('高斯db'), partition p3 values less than(MAXVALUE) ); -- ERROR +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY RANGE(part) ( + partition p1 values less than('楂樻柉DB'), + partition p2 values less than(_gbk'高斯db'), + partition p3 values less than(MAXVALUE) +); -- ERROR +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY LIST(part) ( + partition p1 values('高斯DB'), + partition p2 values('高斯db') +); -- ERROR +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY RANGE(part) ( + PARTITION pass START('高斯DB') END('高斯db'), + PARTITION excellent START('高斯db') END(MAXVALUE) +); -- unsupported + -- -- -- utf8mb4 -CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_bin, a int) +CREATE TABLE t_multi_charset_partkey (part text collate utf8mb4_bin, a int) + PARTITION BY HASH(part) ( + partition p1, + partition p2, + partition p3, + partition p4 +); +-- -- -- insert +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯DB', 1); +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯db', 2); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯DB1', 3); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯db1', 4); +-- -- -- select +SELECT * FROM t_multi_charset_partkey PARTITION(p1) order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION(p2) order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION(p3) order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_gbk'高斯db') order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_utf8mb4'高斯DB1') order by 1,2; +-- -- -- partition pruning +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db1' order by 1,2; +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db1' order by 1,2; +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; -- ALL PARTS +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; +-- -- -- partiton ddl +ALTER TABLE t_multi_charset_partkey SPLIT PARTITION p1 AT ( '高斯DB' ) INTO ( PARTITION p1, PARTITION p4); -- not support +ALTER TABLE t_multi_charset_partkey RENAME PARTITION FOR(_gbk'高斯db') TO newp1; +SELECT * FROM t_multi_charset_partkey PARTITION(newp1) order by 1,2; +DROP TABLE t_multi_charset_partkey; + +-- -- -- utf8mb4 +CREATE TABLE t_multi_charset_partkey (part text collate utf8mb4_bin, a int) PARTITION BY RANGE(part) ( partition p1 values less than('楂樻柉DB'), partition p2 values less than('楂樻柉db'), @@ -1162,6 +1214,8 @@ SELECT * FROM t_multi_charset_partkey PARTITION(p2) order by 1,2; SELECT * FROM t_multi_charset_partkey PARTITION(p3) order by 1,2; SELECT * FROM t_multi_charset_partkey PARTITION(p4) order by 1,2; SELECT * FROM t_multi_charset_partkey PARTITION(p5) order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_gbk'高斯db') order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_utf8mb4'高斯DB') order by 1,2; -- -- -- partition pruning EXPLAIN (costs off) SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; @@ -1172,6 +1226,44 @@ SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db' order by 1,2 EXPLAIN (costs off) SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; -- ALL PARTS SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; +-- -- -- partiton ddl +ALTER TABLE t_multi_charset_partkey SPLIT PARTITION FOR(_gbk'高斯DB') AT (_gbk'高斯DB1 ') INTO (PARTITION p2_1, PARTITION p2_2); +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯DB1', 1); +SELECT * FROM t_multi_charset_partkey PARTITION(p2_1) order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION(p2_2) order by 1,2; +ALTER TABLE t_multi_charset_partkey RENAME PARTITION FOR(_gbk'高斯db') TO p3_1; +SELECT * FROM t_multi_charset_partkey PARTITION(p3_1) order by 1,2; +DROP TABLE t_multi_charset_partkey; + +-- -- -- utf8mb4 +CREATE TABLE t_multi_charset_partkey (part varchar(32) collate utf8mb4_unicode_ci, part2 varchar(32) collate utf8mb4_general_ci, a int) + PARTITION BY LIST COLUMNS(part, part2) ( + partition p1 values in(('楂樻柉DB', '楂樻柉db')), + partition p2 values in(('高斯db', '高斯DB')) +); +-- -- -- insert +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯DB', _gbk'高斯DB', 1); +INSERT INTO t_multi_charset_partkey VALUES(_gbk'高斯db', _gbk'高斯db', 2); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯DB', _utf8mb4'高斯DB', 3); +INSERT INTO t_multi_charset_partkey VALUES(_utf8mb4'高斯db', _utf8mb4'高斯db', 4); +-- -- -- select +SELECT * FROM t_multi_charset_partkey PARTITION(p1) order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION(p2) order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_gbk'高斯DB', _gbk'高斯db') order by 1,2; +SELECT * FROM t_multi_charset_partkey PARTITION FOR(_utf8mb4'高斯db', _utf8mb4'高斯db') order by 1,2; +-- -- -- partition pruning +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' order by 1,2; +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db' order by 1,2; +SELECT * FROM t_multi_charset_partkey WHERE part=_utf8mb4'高斯db' order by 1,2; +EXPLAIN (costs off) +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; -- ALL PARTS +SELECT * FROM t_multi_charset_partkey WHERE part=_gbk'高斯DB' collate gbk_chinese_ci order by 1,2; +-- -- -- partiton ddl +ALTER TABLE t_multi_charset_partkey RENAME PARTITION FOR(_gbk'高斯DB', _gbk'高斯db') TO p1_1; +SELECT * FROM t_multi_charset_partkey PARTITION(p1_1) order by 1,2; DROP TABLE t_multi_charset_partkey; -- -- -- gbk