[Feature-WIP](inverted index) support array type for inverted index reader (#16355)

This commit is contained in:
YueW
2023-02-02 16:14:14 +08:00
committed by GitHub
parent a69c0f28ca
commit bb179b77f7
6 changed files with 195 additions and 32 deletions

View File

@ -49,6 +49,17 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
if (slot->type().is_string_type() && expr->type().is_string_type()) {
return true;
}
if (slot->type().is_array_type()) {
if (slot->type().children[0].type == expr->type().type) {
return true;
}
if (slot->type().children[0].is_date_type() && expr->type().is_date_type()) {
return true;
}
if (slot->type().children[0].is_string_type() && expr->type().is_string_type()) {
return true;
}
}
return false;
}
@ -391,7 +402,14 @@ Status VScanNode::_normalize_conjuncts() {
std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
switch (slots[slot_idx]->type().type) {
auto type = slots[slot_idx]->type().type;
if (slots[slot_idx]->type().type == TYPE_ARRAY) {
type = slots[slot_idx]->type().children[0].type;
if (type == TYPE_ARRAY) {
continue;
}
}
switch (type) {
#define M(NAME) \
case TYPE_##NAME: { \
ColumnValueRange<TYPE_##NAME> range(slots[slot_idx]->col_name(), \

View File

@ -109,6 +109,7 @@ public abstract class Type {
private static final Logger LOG = LogManager.getLogger(Type.class);
private static final ArrayList<ScalarType> integerTypes;
private static final ArrayList<ScalarType> stringTypes;
private static final ArrayList<ScalarType> numericTypes;
private static final ArrayList<ScalarType> numericDateTimeTypes;
private static final ArrayList<ScalarType> supportedTypes;
@ -123,6 +124,11 @@ public abstract class Type {
integerTypes.add(BIGINT);
integerTypes.add(LARGEINT);
stringTypes = Lists.newArrayList();
stringTypes.add(CHAR);
stringTypes.add(VARCHAR);
stringTypes.add(STRING);
numericTypes = Lists.newArrayList();
numericTypes.addAll(integerTypes);
numericTypes.add(FLOAT);
@ -207,6 +213,10 @@ public abstract class Type {
return integerTypes;
}
public static ArrayList<ScalarType> getStringTypes() {
return stringTypes;
}
public static ArrayList<ScalarType> getNumericTypes() {
return numericTypes;
}

View File

@ -17,6 +17,7 @@
package org.apache.doris.analysis;
import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.PrimitiveType;
@ -176,6 +177,9 @@ public class IndexDef {
|| indexType == IndexType.NGRAM_BF) {
String indexColName = column.getName();
PrimitiveType colType = column.getDataType();
if (indexType == IndexType.INVERTED && colType.isArrayType()) {
colType = ((ArrayType) column.getType()).getItemType().getPrimitiveType();
}
if (!(colType.isDateType() || colType.isDecimalV2Type() || colType.isDecimalV3Type()
|| colType.isFixedPointType() || colType.isStringType() || colType == PrimitiveType.BOOLEAN)) {
throw new AnalysisException(colType + " is not supported in " + indexType.toString() + " index. "

View File

@ -107,38 +107,41 @@ public class MatchPredicate extends Predicate {
Lists.<Type>newArrayList(new ArrayType(t), t),
Type.BOOLEAN));
}
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ANY.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ANY.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ALL.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ALL.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR),
Type.BOOLEAN));
for (Type t : Type.getStringTypes()) {
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ANY.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(t, t),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ANY.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(t), t),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_PHRASE.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_PHRASE.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ALL.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(t, t),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_ALL.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(t), t),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_PHRASE.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(t, t),
Type.BOOLEAN));
functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator(
Operator.MATCH_PHRASE.getName(),
symbolNotUsed,
Lists.<Type>newArrayList(new ArrayType(t), t),
Type.BOOLEAN));
}
}
private final Operator op;
@ -219,7 +222,7 @@ public class MatchPredicate extends Predicate {
collectChildReturnTypes(), Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
if (fn == null) {
throw new AnalysisException(
"no function found for " + op.toString() + " " + toSql());
"no function found for " + op.toString() + "," + toSql());
}
Expr e1 = getChild(0);
Expr e2 = getChild(1);

View File

@ -0,0 +1,58 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 [10, 20, 30] ['i', 'love', 'china']
-- !sql --
1 [10, 20, 30] ['i', 'love', 'china']
2 [20, 30, 40] ['i', 'love', 'north korea']
-- !sql --
2 [20, 30, 40] ['i', 'love', 'north korea']
-- !sql --
2 [20, 30, 40] ['i', 'love', 'north korea']
-- !sql --
2 [20, 30, 40] ['i', 'love', 'north korea']
3 [30, 40, 50] \N
4 [40, 50, 60] \N
-- !sql --
1 [10, 20, 30] ['i', 'love', 'china']
2 [20, 30, 40] ['i', 'love', 'north korea']
3 [30, 40, 50] \N
4 [40, 50, 60] \N
-- !sql --
3 [30, 40, 50] \N
4 [40, 50, 60] \N
-- !sql --
1 [10, 20, 30] ['i', 'love', 'china']
2 [20, 30, 40] ['i', 'love', 'north korea']
3 [30, 40, 50] \N
-- !sql --
1 [10, 20, 30] ['i', 'love', 'china']
-- !sql --
1 [10, 20, 30] ['i', 'love', 'china']
2 [20, 30, 40] ['i', 'love', 'north korea']
-- !sql --
1 [10, 20, 30] ['i', 'love', 'china']
2 [20, 30, 40] ['i', 'love', 'north korea']
3 [30, 40, 50] \N
-- !sql --
2 [20, 30, 40] ['i', 'love', 'north korea']
3 [30, 40, 50] \N
4 [40, 50, 60] \N
-- !sql --
3 [30, 40, 50] \N
4 [40, 50, 60] \N
-- !sql --
4 [40, 50, 60] \N

View File

@ -0,0 +1,70 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_array_index"){
// prepare test table
def timeout = 60000
def delta_time = 1000
def alter_res = "null"
def useTime = 0
def indexTblName = "array_test"
sql "DROP TABLE IF EXISTS ${indexTblName}"
// create 1 replica table
sql """
CREATE TABLE IF NOT EXISTS ${indexTblName}(
`id`int(11)NULL,
`int_array` array<int(20)> NULL,
`c_array` array<varchar(20)> NULL,
INDEX c_array_idx(`c_array`) USING INVERTED PROPERTIES("parser"="english") COMMENT 'c_array index',
INDEX int_array_idx(`int_array`) USING INVERTED COMMENT 'int_array index'
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES(
"replication_allocation" = "tag.location.default: 1",
"persistent"="false"
);
"""
// set enable_vectorized_engine=true
sql """ SET enable_vectorized_engine=true; """
def var_result = sql "show variables"
logger.info("show variales result: " + var_result )
sql "INSERT INTO $indexTblName VALUES (1, [10,20,30], ['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, [30,40,50], NULL);"
sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea';"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60;"
}