[feature](inverted index) match_phrase_prefix feature added (#27404)

select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'xxx';
This commit is contained in:
zzzxl
2023-12-05 20:15:13 +08:00
committed by GitHub
parent ffa4ea66d5
commit 05adbfdb3d
26 changed files with 534 additions and 8 deletions

View File

@ -344,6 +344,7 @@ MATCH_ELEMENT_GT: 'ELEMENT_GT';
MATCH_ELEMENT_LE: 'ELEMENT_LE';
MATCH_ELEMENT_LT: 'ELEMENT_LT';
MATCH_PHRASE: 'MATCH_PHRASE';
MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
MATERIALIZED: 'MATERIALIZED';
MAX: 'MAX';
MAXVALUE: 'MAXVALUE';

View File

@ -593,7 +593,7 @@ rowConstructorItem
predicate
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
| NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression
| NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE) pattern=valueExpression
| NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE | MATCH_PHRASE_PREFIX) pattern=valueExpression
| NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
| NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
| IS NOT? kind=NULL

View File

@ -478,6 +478,7 @@ terminal String
KW_MATCH_ANY,
KW_MATCH_ALL,
KW_MATCH_PHRASE,
KW_MATCH_PHRASE_PREFIX,
KW_MATCH_ELEMENT_EQ,
KW_MATCH_ELEMENT_LT,
KW_MATCH_ELEMENT_GT,
@ -975,7 +976,7 @@ precedence left KW_AND;
precedence left KW_NOT, NOT;
precedence left KW_BETWEEN, KW_IN, KW_IS, KW_EXISTS;
precedence left KW_LIKE, KW_REGEXP;
precedence left KW_MATCH_ANY, KW_MATCH_ALL, KW_MATCH_PHRASE, KW_MATCH, KW_MATCH_ELEMENT_EQ, KW_MATCH_ELEMENT_LT, KW_MATCH_ELEMENT_GT, KW_MATCH_ELEMENT_LE, KW_MATCH_ELEMENT_GE;
precedence left KW_MATCH_ANY, KW_MATCH_ALL, KW_MATCH_PHRASE, KW_MATCH_PHRASE_PREFIX, KW_MATCH, KW_MATCH_ELEMENT_EQ, KW_MATCH_ELEMENT_LT, KW_MATCH_ELEMENT_GT, KW_MATCH_ELEMENT_LE, KW_MATCH_ELEMENT_GE;
precedence left EQUAL, LESSTHAN, GREATERTHAN;
precedence left ADD, SUBTRACT;
precedence left AT, STAR, DIVIDE, MOD, KW_DIV;
@ -7022,6 +7023,8 @@ match_predicate ::=
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ALL, e1, e2); :}
| expr:e1 KW_MATCH_PHRASE expr:e2
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_PHRASE, e1, e2); :}
| expr:e1 KW_MATCH_PHRASE_PREFIX expr:e2
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_PHRASE_PREFIX, e1, e2); :}
| expr:e1 KW_MATCH_ELEMENT_EQ expr:e2
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_EQ, e1, e2); :}
| expr:e1 KW_MATCH_ELEMENT_LT expr:e2

View File

@ -50,6 +50,7 @@ public class MatchPredicate extends Predicate {
MATCH_ANY("MATCH_ANY", "match_any", TExprOpcode.MATCH_ANY),
MATCH_ALL("MATCH_ALL", "match_all", TExprOpcode.MATCH_ALL),
MATCH_PHRASE("MATCH_PHRASE", "match_phrase", TExprOpcode.MATCH_PHRASE),
MATCH_PHRASE_PREFIX("MATCH_PHRASE_PREFIX", "match_phrase_prefix", TExprOpcode.MATCH_PHRASE_PREFIX),
MATCH_ELEMENT_EQ("MATCH_ELEMENT_EQ", "match_element_eq", TExprOpcode.MATCH_ELEMENT_EQ),
MATCH_ELEMENT_LT("MATCH_ELEMENT_LT", "match_element_lt", TExprOpcode.MATCH_ELEMENT_LT),
MATCH_ELEMENT_GT("MATCH_ELEMENT_GT", "match_element_gt", TExprOpcode.MATCH_ELEMENT_GT),
@ -147,6 +148,16 @@ public class MatchPredicate extends Predicate {
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(t), t),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_PHRASE_PREFIX.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(t, t),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_PHRASE_PREFIX.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(t), t),
Type.BOOLEAN));
}
}

View File

@ -226,6 +226,7 @@ import org.apache.doris.nereids.trees.expressions.ListQuery;
import org.apache.doris.nereids.trees.expressions.MatchAll;
import org.apache.doris.nereids.trees.expressions.MatchAny;
import org.apache.doris.nereids.trees.expressions.MatchPhrase;
import org.apache.doris.nereids.trees.expressions.MatchPhrasePrefix;
import org.apache.doris.nereids.trees.expressions.Mod;
import org.apache.doris.nereids.trees.expressions.Multiply;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
@ -2821,6 +2822,12 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
getExpression(ctx.pattern)
);
break;
case DorisParser.MATCH_PHRASE_PREFIX:
outExpression = new MatchPhrasePrefix(
valueExpression,
getExpression(ctx.pattern)
);
break;
default:
throw new ParseException("Unsupported predicate type: " + ctx.kind.getText(), ctx);
}

View File

@ -49,6 +49,8 @@ public abstract class Match extends BinaryOperator implements PropagateNullable
return Operator.MATCH_ALL;
case "MATCH_PHRASE":
return Operator.MATCH_PHRASE;
case "MATCH_PHRASE_PREFIX":
return Operator.MATCH_PHRASE_PREFIX;
default:
throw new AnalysisException("UnSupported type for match: " + symbol);
}

View File

@ -0,0 +1,49 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.expressions;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.util.List;
/**
* like expression: a MATCH_PHRASE_PREFIX 'hello w'.
*/
public class MatchPhrasePrefix extends Match {
public MatchPhrasePrefix(Expression left, Expression right) {
super(ImmutableList.of(left, right), "MATCH_PHRASE_PREFIX");
}
private MatchPhrasePrefix(List<Expression> children) {
super(children, "MATCH_PHRASE_PREFIX");
}
@Override
public MatchPhrasePrefix withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 2);
return new MatchPhrasePrefix(children);
}
@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitMatchPhrasePrefix(this, context);
}
}

View File

@ -59,6 +59,7 @@ import org.apache.doris.nereids.trees.expressions.Match;
import org.apache.doris.nereids.trees.expressions.MatchAll;
import org.apache.doris.nereids.trees.expressions.MatchAny;
import org.apache.doris.nereids.trees.expressions.MatchPhrase;
import org.apache.doris.nereids.trees.expressions.MatchPhrasePrefix;
import org.apache.doris.nereids.trees.expressions.Mod;
import org.apache.doris.nereids.trees.expressions.Multiply;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
@ -494,6 +495,10 @@ public abstract class ExpressionVisitor<R, C>
return visitMatch(matchPhrase, context);
}
public R visitMatchPhrasePrefix(MatchPhrasePrefix matchPhrasePrefix, C context) {
return visitMatch(matchPhrasePrefix, context);
}
/* ********************************************************************************************
* Unbound expressions
* ********************************************************************************************/

View File

@ -423,6 +423,7 @@ public class SessionVariable implements Serializable, Writable {
public static final String ENABLE_UNIQUE_KEY_PARTIAL_UPDATE = "enable_unique_key_partial_update";
public static final String INVERTED_INDEX_CONJUNCTION_OPT_THRESHOLD = "inverted_index_conjunction_opt_threshold";
public static final String INVERTED_INDEX_MAX_EXPANSIONS = "inverted_index_max_expansions";
public static final String AUTO_ANALYZE_START_TIME = "auto_analyze_start_time";
@ -1316,6 +1317,12 @@ public class SessionVariable implements Serializable, Writable {
+ " use a skiplist to optimize the intersection."})
public int invertedIndexConjunctionOptThreshold = 1000;
@VariableMgr.VarAttr(name = INVERTED_INDEX_MAX_EXPANSIONS,
description = {"这个参数用来限制查询时扩展的词项(terms)的数量,以此来控制查询的性能",
"This parameter is used to limit the number of term expansions during a query,"
+ " thereby controlling query performance"})
public int invertedIndexMaxExpansions = 50;
@VariableMgr.VarAttr(name = SQL_DIALECT, needForward = true, checker = "checkSqlDialect",
description = {"解析sql使用的方言", "The dialect used to parse sql."})
public String sqlDialect = "doris";
@ -2635,6 +2642,7 @@ public class SessionVariable implements Serializable, Writable {
tResult.setEnableMemtableOnSinkNode(enableMemtableOnSinkNode);
tResult.setInvertedIndexConjunctionOptThreshold(invertedIndexConjunctionOptThreshold);
tResult.setInvertedIndexMaxExpansions(invertedIndexMaxExpansions);
tResult.setFasterFloatConvert(fasterFloatConvert);

View File

@ -319,6 +319,7 @@ import org.apache.doris.qe.SqlModeHelper;
keywordMap.put("match_any", new Integer(SqlParserSymbols.KW_MATCH_ANY));
keywordMap.put("match_all", new Integer(SqlParserSymbols.KW_MATCH_ALL));
keywordMap.put("match_phrase", new Integer(SqlParserSymbols.KW_MATCH_PHRASE));
keywordMap.put("match_phrase_prefix", new Integer(SqlParserSymbols.KW_MATCH_PHRASE_PREFIX));
keywordMap.put("element_eq", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_EQ));
keywordMap.put("element_lt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_LT));
keywordMap.put("element_gt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_GT));