[feature](inverted index) match_phrase_prefix feature added (#27404)
select count() from test_index_match_phrase_prefix where request match_phrase_prefix 'xxx';
This commit is contained in:
@ -344,6 +344,7 @@ MATCH_ELEMENT_GT: 'ELEMENT_GT';
|
||||
MATCH_ELEMENT_LE: 'ELEMENT_LE';
|
||||
MATCH_ELEMENT_LT: 'ELEMENT_LT';
|
||||
MATCH_PHRASE: 'MATCH_PHRASE';
|
||||
MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
|
||||
MATERIALIZED: 'MATERIALIZED';
|
||||
MAX: 'MAX';
|
||||
MAXVALUE: 'MAXVALUE';
|
||||
|
||||
@ -593,7 +593,7 @@ rowConstructorItem
|
||||
predicate
|
||||
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
|
||||
| NOT? kind=(LIKE | REGEXP | RLIKE) pattern=valueExpression
|
||||
| NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE) pattern=valueExpression
|
||||
| NOT? kind=(MATCH | MATCH_ANY | MATCH_ALL | MATCH_PHRASE | MATCH_PHRASE_PREFIX) pattern=valueExpression
|
||||
| NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
|
||||
| NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
|
||||
| IS NOT? kind=NULL
|
||||
|
||||
@ -478,6 +478,7 @@ terminal String
|
||||
KW_MATCH_ANY,
|
||||
KW_MATCH_ALL,
|
||||
KW_MATCH_PHRASE,
|
||||
KW_MATCH_PHRASE_PREFIX,
|
||||
KW_MATCH_ELEMENT_EQ,
|
||||
KW_MATCH_ELEMENT_LT,
|
||||
KW_MATCH_ELEMENT_GT,
|
||||
@ -975,7 +976,7 @@ precedence left KW_AND;
|
||||
precedence left KW_NOT, NOT;
|
||||
precedence left KW_BETWEEN, KW_IN, KW_IS, KW_EXISTS;
|
||||
precedence left KW_LIKE, KW_REGEXP;
|
||||
precedence left KW_MATCH_ANY, KW_MATCH_ALL, KW_MATCH_PHRASE, KW_MATCH, KW_MATCH_ELEMENT_EQ, KW_MATCH_ELEMENT_LT, KW_MATCH_ELEMENT_GT, KW_MATCH_ELEMENT_LE, KW_MATCH_ELEMENT_GE;
|
||||
precedence left KW_MATCH_ANY, KW_MATCH_ALL, KW_MATCH_PHRASE, KW_MATCH_PHRASE_PREFIX, KW_MATCH, KW_MATCH_ELEMENT_EQ, KW_MATCH_ELEMENT_LT, KW_MATCH_ELEMENT_GT, KW_MATCH_ELEMENT_LE, KW_MATCH_ELEMENT_GE;
|
||||
precedence left EQUAL, LESSTHAN, GREATERTHAN;
|
||||
precedence left ADD, SUBTRACT;
|
||||
precedence left AT, STAR, DIVIDE, MOD, KW_DIV;
|
||||
@ -7022,6 +7023,8 @@ match_predicate ::=
|
||||
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ALL, e1, e2); :}
|
||||
| expr:e1 KW_MATCH_PHRASE expr:e2
|
||||
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_PHRASE, e1, e2); :}
|
||||
| expr:e1 KW_MATCH_PHRASE_PREFIX expr:e2
|
||||
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_PHRASE_PREFIX, e1, e2); :}
|
||||
| expr:e1 KW_MATCH_ELEMENT_EQ expr:e2
|
||||
{: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_EQ, e1, e2); :}
|
||||
| expr:e1 KW_MATCH_ELEMENT_LT expr:e2
|
||||
|
||||
@ -50,6 +50,7 @@ public class MatchPredicate extends Predicate {
|
||||
MATCH_ANY("MATCH_ANY", "match_any", TExprOpcode.MATCH_ANY),
|
||||
MATCH_ALL("MATCH_ALL", "match_all", TExprOpcode.MATCH_ALL),
|
||||
MATCH_PHRASE("MATCH_PHRASE", "match_phrase", TExprOpcode.MATCH_PHRASE),
|
||||
MATCH_PHRASE_PREFIX("MATCH_PHRASE_PREFIX", "match_phrase_prefix", TExprOpcode.MATCH_PHRASE_PREFIX),
|
||||
MATCH_ELEMENT_EQ("MATCH_ELEMENT_EQ", "match_element_eq", TExprOpcode.MATCH_ELEMENT_EQ),
|
||||
MATCH_ELEMENT_LT("MATCH_ELEMENT_LT", "match_element_lt", TExprOpcode.MATCH_ELEMENT_LT),
|
||||
MATCH_ELEMENT_GT("MATCH_ELEMENT_GT", "match_element_gt", TExprOpcode.MATCH_ELEMENT_GT),
|
||||
@ -147,6 +148,16 @@ public class MatchPredicate extends Predicate {
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(t), t),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_PHRASE_PREFIX.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(t, t),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_PHRASE_PREFIX.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(t), t),
|
||||
Type.BOOLEAN));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -226,6 +226,7 @@ import org.apache.doris.nereids.trees.expressions.ListQuery;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchAll;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchAny;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchPhrase;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchPhrasePrefix;
|
||||
import org.apache.doris.nereids.trees.expressions.Mod;
|
||||
import org.apache.doris.nereids.trees.expressions.Multiply;
|
||||
import org.apache.doris.nereids.trees.expressions.NamedExpression;
|
||||
@ -2821,6 +2822,12 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
|
||||
getExpression(ctx.pattern)
|
||||
);
|
||||
break;
|
||||
case DorisParser.MATCH_PHRASE_PREFIX:
|
||||
outExpression = new MatchPhrasePrefix(
|
||||
valueExpression,
|
||||
getExpression(ctx.pattern)
|
||||
);
|
||||
break;
|
||||
default:
|
||||
throw new ParseException("Unsupported predicate type: " + ctx.kind.getText(), ctx);
|
||||
}
|
||||
|
||||
@ -49,6 +49,8 @@ public abstract class Match extends BinaryOperator implements PropagateNullable
|
||||
return Operator.MATCH_ALL;
|
||||
case "MATCH_PHRASE":
|
||||
return Operator.MATCH_PHRASE;
|
||||
case "MATCH_PHRASE_PREFIX":
|
||||
return Operator.MATCH_PHRASE_PREFIX;
|
||||
default:
|
||||
throw new AnalysisException("UnSupported type for match: " + symbol);
|
||||
}
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.nereids.trees.expressions;
|
||||
|
||||
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* like expression: a MATCH_PHRASE_PREFIX 'hello w'.
|
||||
*/
|
||||
public class MatchPhrasePrefix extends Match {
|
||||
public MatchPhrasePrefix(Expression left, Expression right) {
|
||||
super(ImmutableList.of(left, right), "MATCH_PHRASE_PREFIX");
|
||||
}
|
||||
|
||||
private MatchPhrasePrefix(List<Expression> children) {
|
||||
super(children, "MATCH_PHRASE_PREFIX");
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchPhrasePrefix withChildren(List<Expression> children) {
|
||||
Preconditions.checkArgument(children.size() == 2);
|
||||
return new MatchPhrasePrefix(children);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
|
||||
return visitor.visitMatchPhrasePrefix(this, context);
|
||||
}
|
||||
}
|
||||
@ -59,6 +59,7 @@ import org.apache.doris.nereids.trees.expressions.Match;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchAll;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchAny;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchPhrase;
|
||||
import org.apache.doris.nereids.trees.expressions.MatchPhrasePrefix;
|
||||
import org.apache.doris.nereids.trees.expressions.Mod;
|
||||
import org.apache.doris.nereids.trees.expressions.Multiply;
|
||||
import org.apache.doris.nereids.trees.expressions.NamedExpression;
|
||||
@ -494,6 +495,10 @@ public abstract class ExpressionVisitor<R, C>
|
||||
return visitMatch(matchPhrase, context);
|
||||
}
|
||||
|
||||
public R visitMatchPhrasePrefix(MatchPhrasePrefix matchPhrasePrefix, C context) {
|
||||
return visitMatch(matchPhrasePrefix, context);
|
||||
}
|
||||
|
||||
/* ********************************************************************************************
|
||||
* Unbound expressions
|
||||
* ********************************************************************************************/
|
||||
|
||||
@ -423,6 +423,7 @@ public class SessionVariable implements Serializable, Writable {
|
||||
public static final String ENABLE_UNIQUE_KEY_PARTIAL_UPDATE = "enable_unique_key_partial_update";
|
||||
|
||||
public static final String INVERTED_INDEX_CONJUNCTION_OPT_THRESHOLD = "inverted_index_conjunction_opt_threshold";
|
||||
public static final String INVERTED_INDEX_MAX_EXPANSIONS = "inverted_index_max_expansions";
|
||||
|
||||
public static final String AUTO_ANALYZE_START_TIME = "auto_analyze_start_time";
|
||||
|
||||
@ -1316,6 +1317,12 @@ public class SessionVariable implements Serializable, Writable {
|
||||
+ " use a skiplist to optimize the intersection."})
|
||||
public int invertedIndexConjunctionOptThreshold = 1000;
|
||||
|
||||
@VariableMgr.VarAttr(name = INVERTED_INDEX_MAX_EXPANSIONS,
|
||||
description = {"这个参数用来限制查询时扩展的词项(terms)的数量,以此来控制查询的性能",
|
||||
"This parameter is used to limit the number of term expansions during a query,"
|
||||
+ " thereby controlling query performance"})
|
||||
public int invertedIndexMaxExpansions = 50;
|
||||
|
||||
@VariableMgr.VarAttr(name = SQL_DIALECT, needForward = true, checker = "checkSqlDialect",
|
||||
description = {"解析sql使用的方言", "The dialect used to parse sql."})
|
||||
public String sqlDialect = "doris";
|
||||
@ -2635,6 +2642,7 @@ public class SessionVariable implements Serializable, Writable {
|
||||
tResult.setEnableMemtableOnSinkNode(enableMemtableOnSinkNode);
|
||||
|
||||
tResult.setInvertedIndexConjunctionOptThreshold(invertedIndexConjunctionOptThreshold);
|
||||
tResult.setInvertedIndexMaxExpansions(invertedIndexMaxExpansions);
|
||||
|
||||
tResult.setFasterFloatConvert(fasterFloatConvert);
|
||||
|
||||
|
||||
@ -319,6 +319,7 @@ import org.apache.doris.qe.SqlModeHelper;
|
||||
keywordMap.put("match_any", new Integer(SqlParserSymbols.KW_MATCH_ANY));
|
||||
keywordMap.put("match_all", new Integer(SqlParserSymbols.KW_MATCH_ALL));
|
||||
keywordMap.put("match_phrase", new Integer(SqlParserSymbols.KW_MATCH_PHRASE));
|
||||
keywordMap.put("match_phrase_prefix", new Integer(SqlParserSymbols.KW_MATCH_PHRASE_PREFIX));
|
||||
keywordMap.put("element_eq", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_EQ));
|
||||
keywordMap.put("element_lt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_LT));
|
||||
keywordMap.put("element_gt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_GT));
|
||||
|
||||
Reference in New Issue
Block a user