From 0b368fbbfa2033fd117d69ce9fb2a5ab0df98ee8 Mon Sep 17 00:00:00 2001 From: Zhengguo Yang Date: Wed, 19 Oct 2022 15:40:04 +0800 Subject: [PATCH] [Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448) * [Bugfix] add negtive value check when create mv using vec --- be/src/olap/schema_change.cpp | 3 +- .../functions/function_always_not_nullable.h | 20 ++++++-- be/src/vec/functions/function_bitmap.cpp | 48 +++++++++++++++++++ .../analysis/CreateMaterializedViewStmt.java | 44 ++++++++++++----- .../doris/analysis/FunctionCallExpr.java | 4 ++ .../analysis/MVColumnBitmapUnionPattern.java | 3 +- .../org/apache/doris/catalog/Function.java | 4 ++ .../org/apache/doris/catalog/FunctionSet.java | 1 + .../mvrewrite/FunctionCallEqualRule.java | 1 + .../mvrewrite/ToBitmapToSlotRefRule.java | 3 +- gensrc/script/doris_builtins_functions.py | 6 +++ 11 files changed, 117 insertions(+), 20 deletions(-) diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 49c42615b1..3295035f4b 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -619,7 +619,8 @@ Status RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t data if (!_schema_mapping[i].materialized_function.empty()) { bool (*_do_materialized_transform)(RowCursor*, RowCursor*, const TabletColumn&, int, int, MemPool*) = nullptr; - if (_schema_mapping[i].materialized_function == "to_bitmap") { + if (_schema_mapping[i].materialized_function == "to_bitmap" || + _schema_mapping[i].materialized_function == "to_bitmap_with_check") { _do_materialized_transform = to_bitmap; } else if (_schema_mapping[i].materialized_function == "hll_hash") { _do_materialized_transform = hll_hash; diff --git a/be/src/vec/functions/function_always_not_nullable.h b/be/src/vec/functions/function_always_not_nullable.h index e15e1a91e9..d7f437bcdd 100644 --- a/be/src/vec/functions/function_always_not_nullable.h +++ b/be/src/vec/functions/function_always_not_nullable.h @@ -23,7 +23,7 @@ namespace doris::vectorized { -template +template class FunctionAlwaysNotNullable : public IFunction { public: static constexpr auto name = Function::name; @@ -62,8 +62,14 @@ public: col_nullable->get_null_map_column_ptr().get()); if (col != nullptr && col_nullmap != nullptr) { - Function::vector_nullable(col->get_chars(), col->get_offsets(), - col_nullmap->get_data(), column_result); + if constexpr (WithReturn) { + RETURN_IF_ERROR(Function::vector_nullable(col->get_chars(), col->get_offsets(), + col_nullmap->get_data(), + column_result)); + } else { + Function::vector_nullable(col->get_chars(), col->get_offsets(), + col_nullmap->get_data(), column_result); + } block.replace_by_position(result, std::move(column_result)); return Status::OK(); @@ -71,8 +77,12 @@ public: return type_error(); } } else if (const ColumnString* col = check_and_get_column(column.get())) { - Function::vector(col->get_chars(), col->get_offsets(), column_result); - + if constexpr (WithReturn) { + RETURN_IF_ERROR( + Function::vector(col->get_chars(), col->get_offsets(), column_result)); + } else { + Function::vector(col->get_chars(), col->get_offsets(), column_result); + } block.replace_by_position(result, std::move(column_result)); return Status::OK(); } else { diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 397d70aed5..61e0f99f8c 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -75,6 +75,51 @@ struct ToBitmap { } }; +struct ToBitmapWithCheck { + static constexpr auto name = "to_bitmap_with_check"; + using ReturnType = DataTypeBitMap; + + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + MutableColumnPtr& col_res) { + return execute(data, offsets, nullptr, col_res); + } + + static Status vector_nullable(const ColumnString::Chars& data, + const ColumnString::Offsets& offsets, const NullMap& nullmap, + MutableColumnPtr& col_res) { + return execute(data, offsets, &nullmap, col_res); + } + template + static Status execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + const NullMap* nullmap, MutableColumnPtr& col_res) { + auto* res_column = reinterpret_cast(col_res.get()); + auto& res_data = res_column->get_data(); + size_t size = offsets.size(); + + for (size_t i = 0; i < size; ++i) { + if (arg_is_nullable && ((*nullmap)[i])) { + continue; + } else { + const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); + size_t str_size = offsets[i] - offsets[i - 1]; + StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; + uint64_t int_value = StringParser::string_to_unsigned_int( + raw_str, str_size, &parse_result); + if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) { + res_data[i].add(int_value); + } else { + LOG(WARNING) << "The input: " << raw_str + << " is not valid, to_bitmap only support bigint value from 0 to " + "18446744073709551615 currently"; + return Status::InternalError( + "bitmap value must be in [0, 18446744073709551615)"); + } + } + } + return Status::OK(); + } +}; + struct BitmapFromString { static constexpr auto name = "bitmap_from_string"; @@ -536,6 +581,8 @@ public: using FunctionBitmapEmpty = FunctionConst; using FunctionToBitmap = FunctionAlwaysNotNullable; +using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable; + using FunctionBitmapFromString = FunctionBitmapAlwaysNull; using FunctionBitmapHash = FunctionAlwaysNotNullable>; using FunctionBitmapHash64 = FunctionAlwaysNotNullable>; @@ -564,6 +611,7 @@ using FunctionBitmapSubsetInRange = FunctionBitmapSubs; void register_function_bitmap(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); + factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java index 28926ac73e..8651a2eee8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMaterializedViewStmt.java @@ -64,11 +64,11 @@ public class CreateMaterializedViewStmt extends DdlStmt { static { FN_NAME_TO_PATTERN = Maps.newHashMap(); FN_NAME_TO_PATTERN.put(AggregateType.SUM.name().toLowerCase(), - new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase())); + new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase())); FN_NAME_TO_PATTERN.put(AggregateType.MIN.name().toLowerCase(), - new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase())); + new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase())); FN_NAME_TO_PATTERN.put(AggregateType.MAX.name().toLowerCase(), - new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase())); + new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase())); FN_NAME_TO_PATTERN.put(FunctionSet.COUNT, new MVColumnOneChildPattern(FunctionSet.COUNT)); FN_NAME_TO_PATTERN.put(FunctionSet.BITMAP_UNION, new MVColumnBitmapUnionPattern()); FN_NAME_TO_PATTERN.put(FunctionSet.HLL_UNION, new MVColumnHLLUnionPattern()); @@ -147,16 +147,16 @@ public class CreateMaterializedViewStmt extends DdlStmt { analyzeFromClause(); if (selectStmt.getWhereClause() != null) { throw new AnalysisException("The where clause is not supported in add materialized view clause, expr:" - + selectStmt.getWhereClause().toSql()); + + selectStmt.getWhereClause().toSql()); } if (selectStmt.getHavingPred() != null) { throw new AnalysisException("The having clause is not supported in add materialized view clause, expr:" - + selectStmt.getHavingPred().toSql()); + + selectStmt.getHavingPred().toSql()); } analyzeOrderByClause(); if (selectStmt.getLimit() != -1) { throw new AnalysisException("The limit clause is not supported in add materialized view clause, expr:" - + " limit " + selectStmt.getLimit()); + + " limit " + selectStmt.getLimit()); } } @@ -182,7 +182,7 @@ public class CreateMaterializedViewStmt extends DdlStmt { Expr selectListItemExpr = selectListItem.getExpr(); if (!(selectListItemExpr instanceof SlotRef) && !(selectListItemExpr instanceof FunctionCallExpr)) { throw new AnalysisException("The materialized view only support the single column or function expr. " - + "Error column: " + selectListItemExpr.toSql()); + + "Error column: " + selectListItemExpr.toSql()); } if (selectListItemExpr instanceof SlotRef) { if (meetAggregate) { @@ -211,6 +211,25 @@ public class CreateMaterializedViewStmt extends DdlStmt { throw new AnalysisException( "The function " + functionName + " must match pattern:" + mvColumnPattern.toString()); } + + // for bitmap_union(to_bitmap(column)) function, we should check value is not negative + // in vectorized schema_change mode, so we should rewrite the function to + // bitmap_union(to_bitmap_with_check(column)) + if (functionName.equalsIgnoreCase("bitmap_union")) { + if (functionCallExpr.getChildren().size() == 1 + && functionCallExpr.getChild(0) instanceof FunctionCallExpr) { + Expr child = functionCallExpr.getChild(0); + FunctionCallExpr childFunctionCallExpr = (FunctionCallExpr) child; + if (childFunctionCallExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) { + childFunctionCallExpr.setFnName( + new FunctionName(childFunctionCallExpr.getFnName().getDb(), + "to_bitmap_with_check")); + childFunctionCallExpr.getFn().setName( + new FunctionName(childFunctionCallExpr.getFn().getFunctionName().getDb(), + "to_bitmap_with_check")); + } + } + } } // check duplicate column List slots = new ArrayList<>(); @@ -256,7 +275,7 @@ public class CreateMaterializedViewStmt extends DdlStmt { List orderByElements = selectStmt.getOrderByElements(); if (orderByElements.size() > mvColumnItemList.size()) { throw new AnalysisException("The number of columns in order clause must be less than " + "the number of " - + "columns in select clause"); + + "columns in select clause"); } if (beginIndexOfAggregation != -1 && (orderByElements.size() != (beginIndexOfAggregation))) { throw new AnalysisException("The key of columns in mv must be all of group by columns"); @@ -265,13 +284,13 @@ public class CreateMaterializedViewStmt extends DdlStmt { Expr orderByElement = orderByElements.get(i).getExpr(); if (!(orderByElement instanceof SlotRef)) { throw new AnalysisException("The column in order clause must be original column without calculation. " - + "Error column: " + orderByElement.toSql()); + + "Error column: " + orderByElement.toSql()); } MVColumnItem mvColumnItem = mvColumnItemList.get(i); SlotRef slotRef = (SlotRef) orderByElement; if (!mvColumnItem.getName().equalsIgnoreCase(slotRef.getColumnName())) { throw new AnalysisException("The order of columns in order by clause must be same as " - + "the order of columns in select list"); + + "the order of columns in select list"); } Preconditions.checkState(mvColumnItem.getAggregationType() == null); mvColumnItem.setIsKey(true); @@ -451,7 +470,8 @@ public class CreateMaterializedViewStmt extends DdlStmt { CastExpr castExpr = new CastExpr(new TypeDef(Type.VARCHAR), baseSlotRef); List params = Lists.newArrayList(); params.add(castExpr); - FunctionCallExpr defineExpr = new FunctionCallExpr(FunctionSet.TO_BITMAP, params); + FunctionCallExpr defineExpr = + new FunctionCallExpr(FunctionSet.TO_BITMAP_WITH_CHECK, params); result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr); } else { result.put(baseColumnName, null); @@ -471,7 +491,7 @@ public class CreateMaterializedViewStmt extends DdlStmt { case FunctionSet.COUNT: Expr defineExpr = new CaseExpr(null, Lists.newArrayList( new CaseWhenClause(new IsNullPredicate(slots.get(0), false), - new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT)); + new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT)); result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr); break; default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index bef48ed4a3..6ff3404129 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -128,6 +128,10 @@ public class FunctionCallExpr extends Expr { isTableFnCall = tableFnCall; } + public void setFnName(FunctionName fnName) { + this.fnName = fnName; + } + public Function getFn() { return fn; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java index 8dabace3a1..25159fb252 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MVColumnBitmapUnionPattern.java @@ -44,7 +44,8 @@ public class MVColumnBitmapUnionPattern implements MVColumnPattern { } } else if (fnExpr.getChild(0) instanceof FunctionCallExpr) { FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0); - if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) { + if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP) + && !child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP_WITH_CHECK)) { return false; } SlotRef slotRef = child0FnExpr.getChild(0).unwrapSlotRef(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java index f1fd3c0f06..c99cc87c59 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java @@ -201,6 +201,10 @@ public class Function implements Writable { location = loc; } + public void setName(FunctionName name) { + this.name = name; + } + public TFunctionBinaryType getBinaryType() { return binaryType; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index a2d5ba356a..eab49faa5e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -911,6 +911,7 @@ public class FunctionSet { .build(); public static final String TO_BITMAP = "to_bitmap"; + public static final String TO_BITMAP_WITH_CHECK = "to_bitmap_with_check"; public static final String BITMAP_UNION = "bitmap_union"; public static final String BITMAP_UNION_COUNT = "bitmap_union_count"; public static final String BITMAP_UNION_INT = "bitmap_union_int"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java index aee0871851..4e9005f339 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/FunctionCallEqualRule.java @@ -41,6 +41,7 @@ public class FunctionCallEqualRule implements MVExprEqualRule { builder.put(FunctionSet.HLL_UNION, "hll_union"); builder.put(FunctionSet.HLL_UNION, "hll_raw_agg"); builder.put(FunctionSet.TO_BITMAP, FunctionSet.TO_BITMAP); + builder.put(FunctionSet.TO_BITMAP_WITH_CHECK, FunctionSet.TO_BITMAP_WITH_CHECK); builder.put(FunctionSet.HLL_HASH, FunctionSet.HLL_HASH); columnAggTypeMatchFnName = builder.build(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java index c97f069cf8..a486c6185f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/mvrewrite/ToBitmapToSlotRefRule.java @@ -65,7 +65,8 @@ public class ToBitmapToSlotRefRule implements ExprRewriteRule { return expr; } FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0); - if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) { + if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap") + && !child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap_with_check")) { return expr; } if (child0FnExpr.getChild(0) instanceof SlotRef) { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 621bcb3764..ed71c714fc 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2421,6 +2421,9 @@ visible_functions = [ [['to_bitmap'], 'BITMAP', ['VARCHAR'], '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'], + '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE', + '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], [['bitmap_hash'], 'BITMAP', ['VARCHAR'], '_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], @@ -2430,6 +2433,9 @@ visible_functions = [ [['to_bitmap'], 'BITMAP', ['STRING'], '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + [['to_bitmap_with_check'], 'BITMAP', ['STRING'], + '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE', + '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], [['bitmap_hash'], 'BITMAP', ['STRING'], '_ZN5doris15BitmapFunctions11bitmap_hash64EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],