[Feature-WIP](inverted index) support array type for inverted index reader (#16355)
This commit is contained in:
@ -49,6 +49,17 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
|
||||
if (slot->type().is_string_type() && expr->type().is_string_type()) {
|
||||
return true;
|
||||
}
|
||||
if (slot->type().is_array_type()) {
|
||||
if (slot->type().children[0].type == expr->type().type) {
|
||||
return true;
|
||||
}
|
||||
if (slot->type().children[0].is_date_type() && expr->type().is_date_type()) {
|
||||
return true;
|
||||
}
|
||||
if (slot->type().children[0].is_string_type() && expr->type().is_string_type()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -391,7 +402,14 @@ Status VScanNode::_normalize_conjuncts() {
|
||||
std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
|
||||
|
||||
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
|
||||
switch (slots[slot_idx]->type().type) {
|
||||
auto type = slots[slot_idx]->type().type;
|
||||
if (slots[slot_idx]->type().type == TYPE_ARRAY) {
|
||||
type = slots[slot_idx]->type().children[0].type;
|
||||
if (type == TYPE_ARRAY) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch (type) {
|
||||
#define M(NAME) \
|
||||
case TYPE_##NAME: { \
|
||||
ColumnValueRange<TYPE_##NAME> range(slots[slot_idx]->col_name(), \
|
||||
|
||||
@ -109,6 +109,7 @@ public abstract class Type {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(Type.class);
|
||||
private static final ArrayList<ScalarType> integerTypes;
|
||||
private static final ArrayList<ScalarType> stringTypes;
|
||||
private static final ArrayList<ScalarType> numericTypes;
|
||||
private static final ArrayList<ScalarType> numericDateTimeTypes;
|
||||
private static final ArrayList<ScalarType> supportedTypes;
|
||||
@ -123,6 +124,11 @@ public abstract class Type {
|
||||
integerTypes.add(BIGINT);
|
||||
integerTypes.add(LARGEINT);
|
||||
|
||||
stringTypes = Lists.newArrayList();
|
||||
stringTypes.add(CHAR);
|
||||
stringTypes.add(VARCHAR);
|
||||
stringTypes.add(STRING);
|
||||
|
||||
numericTypes = Lists.newArrayList();
|
||||
numericTypes.addAll(integerTypes);
|
||||
numericTypes.add(FLOAT);
|
||||
@ -207,6 +213,10 @@ public abstract class Type {
|
||||
return integerTypes;
|
||||
}
|
||||
|
||||
public static ArrayList<ScalarType> getStringTypes() {
|
||||
return stringTypes;
|
||||
}
|
||||
|
||||
public static ArrayList<ScalarType> getNumericTypes() {
|
||||
return numericTypes;
|
||||
}
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.analysis;
|
||||
|
||||
import org.apache.doris.catalog.ArrayType;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.KeysType;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
@ -176,6 +177,9 @@ public class IndexDef {
|
||||
|| indexType == IndexType.NGRAM_BF) {
|
||||
String indexColName = column.getName();
|
||||
PrimitiveType colType = column.getDataType();
|
||||
if (indexType == IndexType.INVERTED && colType.isArrayType()) {
|
||||
colType = ((ArrayType) column.getType()).getItemType().getPrimitiveType();
|
||||
}
|
||||
if (!(colType.isDateType() || colType.isDecimalV2Type() || colType.isDecimalV3Type()
|
||||
|| colType.isFixedPointType() || colType.isStringType() || colType == PrimitiveType.BOOLEAN)) {
|
||||
throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. "
|
||||
|
||||
@ -107,38 +107,41 @@ public class MatchPredicate extends Predicate {
|
||||
Lists.<Type>newArrayList(new ArrayType(t), t),
|
||||
Type.BOOLEAN));
|
||||
}
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ANY.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ANY.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR),
|
||||
Type.BOOLEAN));
|
||||
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ALL.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ALL.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR),
|
||||
Type.BOOLEAN));
|
||||
for (Type t : Type.getStringTypes()) {
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ANY.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(t, t),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ANY.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(t), t),
|
||||
Type.BOOLEAN));
|
||||
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_PHRASE.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_PHRASE.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ALL.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(t, t),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_ALL.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(t), t),
|
||||
Type.BOOLEAN));
|
||||
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_PHRASE.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(t, t),
|
||||
Type.BOOLEAN));
|
||||
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
|
||||
Operator.MATCH_PHRASE.getName(),
|
||||
symbolNotUsed,
|
||||
Lists.<Type>newArrayList(new ArrayType(t), t),
|
||||
Type.BOOLEAN));
|
||||
}
|
||||
}
|
||||
|
||||
private final Operator op;
|
||||
@ -219,7 +222,7 @@ public class MatchPredicate extends Predicate {
|
||||
collectChildReturnTypes(), Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
|
||||
if (fn == null) {
|
||||
throw new AnalysisException(
|
||||
"no function found for " + op.toString() + " " + toSql());
|
||||
"no function found for " + op.toString() + "," + toSql());
|
||||
}
|
||||
Expr e1 = getChild(0);
|
||||
Expr e2 = getChild(1);
|
||||
|
||||
58
regression-test/data/inverted_index_p0/test_array_index.out
Normal file
58
regression-test/data/inverted_index_p0/test_array_index.out
Normal file
@ -0,0 +1,58 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
1 [10, 20, 30] ['i', 'love', 'china']
|
||||
|
||||
-- !sql --
|
||||
1 [10, 20, 30] ['i', 'love', 'china']
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
|
||||
-- !sql --
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
|
||||
-- !sql --
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
|
||||
-- !sql --
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
3 [30, 40, 50] \N
|
||||
4 [40, 50, 60] \N
|
||||
|
||||
-- !sql --
|
||||
1 [10, 20, 30] ['i', 'love', 'china']
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
3 [30, 40, 50] \N
|
||||
4 [40, 50, 60] \N
|
||||
|
||||
-- !sql --
|
||||
3 [30, 40, 50] \N
|
||||
4 [40, 50, 60] \N
|
||||
|
||||
-- !sql --
|
||||
1 [10, 20, 30] ['i', 'love', 'china']
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
3 [30, 40, 50] \N
|
||||
|
||||
-- !sql --
|
||||
1 [10, 20, 30] ['i', 'love', 'china']
|
||||
|
||||
-- !sql --
|
||||
1 [10, 20, 30] ['i', 'love', 'china']
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
|
||||
-- !sql --
|
||||
1 [10, 20, 30] ['i', 'love', 'china']
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
3 [30, 40, 50] \N
|
||||
|
||||
-- !sql --
|
||||
2 [20, 30, 40] ['i', 'love', 'north korea']
|
||||
3 [30, 40, 50] \N
|
||||
4 [40, 50, 60] \N
|
||||
|
||||
-- !sql --
|
||||
3 [30, 40, 50] \N
|
||||
4 [40, 50, 60] \N
|
||||
|
||||
-- !sql --
|
||||
4 [40, 50, 60] \N
|
||||
|
||||
@ -0,0 +1,70 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_array_index"){
|
||||
// prepare test table
|
||||
|
||||
|
||||
def timeout = 60000
|
||||
def delta_time = 1000
|
||||
def alter_res = "null"
|
||||
def useTime = 0
|
||||
|
||||
def indexTblName = "array_test"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTblName}"
|
||||
// create 1 replica table
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS ${indexTblName}(
|
||||
`id`int(11)NULL,
|
||||
`int_array` array<int(20)> NULL,
|
||||
`c_array` array<varchar(20)> NULL,
|
||||
INDEX c_array_idx(`c_array`) USING INVERTED PROPERTIES("parser"="english") COMMENT 'c_array index',
|
||||
INDEX int_array_idx(`int_array`) USING INVERTED COMMENT 'int_array index'
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`id`)
|
||||
COMMENT 'OLAP'
|
||||
DISTRIBUTED BY HASH(`id`) BUCKETS 1
|
||||
PROPERTIES(
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"persistent"="false"
|
||||
);
|
||||
"""
|
||||
|
||||
// set enable_vectorized_engine=true
|
||||
sql """ SET enable_vectorized_engine=true; """
|
||||
def var_result = sql "show variables"
|
||||
logger.info("show variales result: " + var_result )
|
||||
|
||||
sql "INSERT INTO $indexTblName VALUES (1, [10,20,30], ['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, [30,40,50], NULL);"
|
||||
sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china';"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love';"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north';"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea';"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60;"
|
||||
}
|
||||
Reference in New Issue
Block a user