[fix](Nereids) string literal coercion of in predicate (#35337)

pick from master #35200

Description:
   The sql execute much slow when the literal value with string format in `in predicate`; and the real data is integral type。
```
mysql> set enable_nereids_planner = false;
Query OK, 0 rows affected (0.03 sec)

mysql> select id,sum(clicks) from a_table where id in ('787934713', '306960695') group by id limit 10;
+------------+---------------+
| id | sum(`clicks`) |
+------------+---------------+
|  787934713 |          2838 |
|  306960695 |           339 |
+------------+---------------+
2 rows in set (1.81 sec)

mysql> set enable_nereids_planner = true;
Query OK, 0 rows affected (0.02 sec)

mysql> select id,sum(clicks) from a_table where id in ('787934713', '306960695') group by id limit 10;
+------------+-------------+
| id | sum(clicks) |
+------------+-------------+
|  787934713 |        2838 |
|  306960695 |         339 |
+------------+-------------+
2 rows in set (28.14 sec)
```

Reason:
In legacy planner, the string literal with convert to integral value, but in the nereids planner do not do this convert and with do string matching in BE。

Solved:
do process string literal with numeric in `in predicate` like in `comparison predicate`;
test table:
```
create table a_table(
    k1 BIGINT NOT NULL,
    k2 VARCHAR(100) NOT NULL,
    v1 INT SUM NULL DEFAULT "0"
) ENGINE=OLAP
AGGREGATE KEY(k1,k2)
distributed BY hash(k1) buckets 2
properties("replication_num" = "1");
insert into a_table values (10, 'name1', 10),(20, 'name2', 10);
explain plan select * from a_table where k1 in ('10', '20001');
```
before optimize:
```
+--------------------------------------------------------------------------------------------------------------------------------------+
| Explain String(Nereids Planner)                                                                                                      |
+--------------------------------------------------------------------------------------------------------------------------------------+
| ========== PARSED PLAN (time: 1ms) ==========                                                                                        |
| UnboundResultSink[4] (  )                                                                                                            |
| +--LogicalProject[3] ( distinct=false, projects=[*], excepts=[] )                                                                    |
|    +--LogicalFilter[2] ( predicates='k1 IN ('10001', '20001') )                                                                      |
|       +--LogicalCheckPolicy (  )                                                                                                     |
|          +--UnboundRelation ( id=RelationId#0, nameParts=a_table )                                                                   |
|                                                                                                                                      |
| ========== ANALYZED PLAN (time: 2ms) ==========                                                                                      |
| LogicalResultSink[15] ( outputExprs=[k1#0, k2#1, v1#2] )                                                                             |
| +--LogicalProject[13] ( distinct=false, projects=[k1#0, k2#1, v1#2], excepts=[] )                                                    |
|    +--LogicalFilter[11] ( predicates=cast(k1#0 as TEXT) IN ('10001', '20001') )                                                      |
|       +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=<index_not_selected>, selectedIndexId=12003, preAgg=UNSET )      |
|                                                                                                                                      |
| ========== REWRITTEN PLAN (time: 6ms) ==========                                                                                     |
| LogicalResultSink[45] ( outputExprs=[k1#0, k2#1, v1#2] )                                                                             |
| +--LogicalFilter[43] ( predicates=cast(k1#0 as TEXT) IN ('10001', '20001') )                                                         |
|    +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=a_table, selectedIndexId=12003, preAgg=OFF, No aggregate on scan. ) |
|                                                                                                                                      |
| ========== OPTIMIZED PLAN (time: 6ms) ==========                                                                                     |
| PhysicalResultSink[90] ( outputExprs=[k1#0, k2#1, v1#2] )                                                                            |
| +--PhysicalDistribute[87]@1 ( stats=0.33, distributionSpec=DistributionSpecGather )                                                  |
|    +--PhysicalFilter[84]@1 ( stats=0.33, predicates=cast(k1#0 as TEXT) IN ('10001', '20001') )                                       |
|       +--PhysicalOlapScan[a_table]@0 ( stats=1 )                                                                                     |
+--------------------------------------------------------------------------------------------------------------------------------------+
```
after optimize:
```
+--------------------------------------------------------------------------------------------------------------------------------------+
| Explain String(Nereids Planner)                                                                                                      |
+--------------------------------------------------------------------------------------------------------------------------------------+
| ========== PARSED PLAN (time: 15ms) ==========                                                                                       |
| UnboundResultSink[4] (  )                                                                                                            |
| +--LogicalProject[3] ( distinct=false, projects=[*], excepts=[] )                                                                    |
|    +--LogicalFilter[2] ( predicates='k1 IN ('10001', '20001') )                                                                      |
|       +--LogicalCheckPolicy (  )                                                                                                     |
|          +--UnboundRelation ( id=RelationId#0, nameParts=a_table )                                                                   |
|                                                                                                                                      |
| ========== ANALYZED PLAN (time: 11ms) ==========                                                                                     |
| LogicalResultSink[15] ( outputExprs=[k1#0, k2#1, v1#2] )                                                                             |
| +--LogicalProject[13] ( distinct=false, projects=[k1#0, k2#1, v1#2], excepts=[] )                                                    |
|    +--LogicalFilter[11] ( predicates=k1#0 IN (10001, 20001) )                                                                        |
|       +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=<index_not_selected>, selectedIndexId=12003, preAgg=UNSET )      |
|                                                                                                                                      |
| ========== REWRITTEN PLAN (time: 12ms) ==========                                                                                    |
| LogicalResultSink[45] ( outputExprs=[k1#0, k2#1, v1#2] )                                                                             |
| +--LogicalFilter[43] ( predicates=k1#0 IN (10001, 20001) )                                                                           |
|    +--LogicalOlapScan ( qualified=internal.db.a_table, indexName=a_table, selectedIndexId=12003, preAgg=OFF, No aggregate on scan. ) |
|                                                                                                                                      |
| ========== OPTIMIZED PLAN (time: 4ms) ==========                                                                                     |
| PhysicalResultSink[90] ( outputExprs=[k1#0, k2#1, v1#2] )                                                                            |
| +--PhysicalDistribute[87]@1 ( stats=0, distributionSpec=DistributionSpecGather )                                                     |
|    +--PhysicalFilter[84]@1 ( stats=0, predicates=k1#0 IN (10001, 20001) )                                                            |
|       +--PhysicalOlapScan[a_table]@0 ( stats=2 )                                                                                     |
+--------------------------------------------------------------------------------------------------------------------------------------+
```
This commit is contained in:
wangqt
2024-05-24 14:26:52 +08:00
committed by GitHub
parent bb3a0fd30e
commit 0b90e37227
2 changed files with 51 additions and 6 deletions

View File

@ -118,7 +118,9 @@ import org.apache.logging.log4j.Logger;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
@ -975,8 +977,28 @@ public class TypeCoercionUtils {
}
return inPredicate;
}
// process string literal with numeric
boolean hitString = false;
List<Expression> newOptions = new ArrayList<>(inPredicate.getOptions());
if (!(inPredicate.getCompareExpr().getDataType().isStringLikeType())) {
ListIterator<Expression> iterator = newOptions.listIterator();
while (iterator.hasNext()) {
Expression origOption = iterator.next();
if (origOption instanceof Literal && ((Literal) origOption).isStringLikeLiteral()) {
Optional<Expression> option = TypeCoercionUtils.characterLiteralTypeCoercion(
((Literal) origOption).getStringValue(), inPredicate.getCompareExpr().getDataType());
if (option.isPresent()) {
iterator.set(option.get());
hitString = true;
}
}
}
}
final InPredicate fmtInPredicate =
hitString ? new InPredicate(inPredicate.getCompareExpr(), newOptions) : inPredicate;
Optional<DataType> optionalCommonType = TypeCoercionUtils.findWiderCommonTypeForComparison(
inPredicate.children()
fmtInPredicate.children()
.stream()
.map(Expression::getDataType).collect(Collectors.toList()),
true);
@ -999,18 +1021,18 @@ public class TypeCoercionUtils {
if (optionalCommonType.isPresent()) {
optionalCommonType = Optional.of(downgradeDecimalAndDateLikeType(
optionalCommonType.get(),
inPredicate.getCompareExpr(),
inPredicate.getOptions().toArray(new Expression[0])));
fmtInPredicate.getCompareExpr(),
fmtInPredicate.getOptions().toArray(new Expression[0])));
}
return optionalCommonType
.map(commonType -> {
List<Expression> newChildren = inPredicate.children().stream()
List<Expression> newChildren = fmtInPredicate.children().stream()
.map(e -> TypeCoercionUtils.castIfNotSameType(e, commonType))
.collect(Collectors.toList());
return inPredicate.withChildren(newChildren);
return fmtInPredicate.withChildren(newChildren);
})
.orElse(inPredicate);
.orElse(fmtInPredicate);
}
/**