[Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)

* [Bugfix] add negtive value check when create mv using vec
This commit is contained in:
Zhengguo Yang
2022-10-19 15:40:04 +08:00
committed by GitHub
parent 5423de68dd
commit 0b368fbbfa
11 changed files with 117 additions and 20 deletions

View File

@ -619,7 +619,8 @@ Status RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t data
if (!_schema_mapping[i].materialized_function.empty()) {
bool (*_do_materialized_transform)(RowCursor*, RowCursor*, const TabletColumn&, int,
int, MemPool*) = nullptr;
if (_schema_mapping[i].materialized_function == "to_bitmap") {
if (_schema_mapping[i].materialized_function == "to_bitmap" ||
_schema_mapping[i].materialized_function == "to_bitmap_with_check") {
_do_materialized_transform = to_bitmap;
} else if (_schema_mapping[i].materialized_function == "hll_hash") {
_do_materialized_transform = hll_hash;

View File

@ -23,7 +23,7 @@
namespace doris::vectorized {
template <typename Function>
template <typename Function, bool WithReturn = false>
class FunctionAlwaysNotNullable : public IFunction {
public:
static constexpr auto name = Function::name;
@ -62,8 +62,14 @@ public:
col_nullable->get_null_map_column_ptr().get());
if (col != nullptr && col_nullmap != nullptr) {
Function::vector_nullable(col->get_chars(), col->get_offsets(),
col_nullmap->get_data(), column_result);
if constexpr (WithReturn) {
RETURN_IF_ERROR(Function::vector_nullable(col->get_chars(), col->get_offsets(),
col_nullmap->get_data(),
column_result));
} else {
Function::vector_nullable(col->get_chars(), col->get_offsets(),
col_nullmap->get_data(), column_result);
}
block.replace_by_position(result, std::move(column_result));
return Status::OK();
@ -71,8 +77,12 @@ public:
return type_error();
}
} else if (const ColumnString* col = check_and_get_column<ColumnString>(column.get())) {
Function::vector(col->get_chars(), col->get_offsets(), column_result);
if constexpr (WithReturn) {
RETURN_IF_ERROR(
Function::vector(col->get_chars(), col->get_offsets(), column_result));
} else {
Function::vector(col->get_chars(), col->get_offsets(), column_result);
}
block.replace_by_position(result, std::move(column_result));
return Status::OK();
} else {

View File

@ -75,6 +75,51 @@ struct ToBitmap {
}
};
struct ToBitmapWithCheck {
static constexpr auto name = "to_bitmap_with_check";
using ReturnType = DataTypeBitMap;
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
MutableColumnPtr& col_res) {
return execute<false>(data, offsets, nullptr, col_res);
}
static Status vector_nullable(const ColumnString::Chars& data,
const ColumnString::Offsets& offsets, const NullMap& nullmap,
MutableColumnPtr& col_res) {
return execute<true>(data, offsets, &nullmap, col_res);
}
template <bool arg_is_nullable>
static Status execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
const NullMap* nullmap, MutableColumnPtr& col_res) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
raw_str, str_size, &parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
} else {
LOG(WARNING) << "The input: " << raw_str
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently";
return Status::InternalError(
"bitmap value must be in [0, 18446744073709551615)");
}
}
}
return Status::OK();
}
};
struct BitmapFromString {
static constexpr auto name = "bitmap_from_string";
@ -536,6 +581,8 @@ public:
using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
@ -564,6 +611,7 @@ using FunctionBitmapSubsetInRange = FunctionBitmapSubs<BitmapSubsetInRange>;
void register_function_bitmap(SimpleFunctionFactory& factory) {
factory.register_function<FunctionBitmapEmpty>();
factory.register_function<FunctionToBitmap>();
factory.register_function<FunctionToBitmapWithCheck>();
factory.register_function<FunctionBitmapFromString>();
factory.register_function<FunctionBitmapHash>();
factory.register_function<FunctionBitmapHash64>();

View File

@ -64,11 +64,11 @@ public class CreateMaterializedViewStmt extends DdlStmt {
static {
FN_NAME_TO_PATTERN = Maps.newHashMap();
FN_NAME_TO_PATTERN.put(AggregateType.SUM.name().toLowerCase(),
new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
FN_NAME_TO_PATTERN.put(AggregateType.MIN.name().toLowerCase(),
new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
FN_NAME_TO_PATTERN.put(AggregateType.MAX.name().toLowerCase(),
new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
FN_NAME_TO_PATTERN.put(FunctionSet.COUNT, new MVColumnOneChildPattern(FunctionSet.COUNT));
FN_NAME_TO_PATTERN.put(FunctionSet.BITMAP_UNION, new MVColumnBitmapUnionPattern());
FN_NAME_TO_PATTERN.put(FunctionSet.HLL_UNION, new MVColumnHLLUnionPattern());
@ -147,16 +147,16 @@ public class CreateMaterializedViewStmt extends DdlStmt {
analyzeFromClause();
if (selectStmt.getWhereClause() != null) {
throw new AnalysisException("The where clause is not supported in add materialized view clause, expr:"
+ selectStmt.getWhereClause().toSql());
+ selectStmt.getWhereClause().toSql());
}
if (selectStmt.getHavingPred() != null) {
throw new AnalysisException("The having clause is not supported in add materialized view clause, expr:"
+ selectStmt.getHavingPred().toSql());
+ selectStmt.getHavingPred().toSql());
}
analyzeOrderByClause();
if (selectStmt.getLimit() != -1) {
throw new AnalysisException("The limit clause is not supported in add materialized view clause, expr:"
+ " limit " + selectStmt.getLimit());
+ " limit " + selectStmt.getLimit());
}
}
@ -182,7 +182,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
Expr selectListItemExpr = selectListItem.getExpr();
if (!(selectListItemExpr instanceof SlotRef) && !(selectListItemExpr instanceof FunctionCallExpr)) {
throw new AnalysisException("The materialized view only support the single column or function expr. "
+ "Error column: " + selectListItemExpr.toSql());
+ "Error column: " + selectListItemExpr.toSql());
}
if (selectListItemExpr instanceof SlotRef) {
if (meetAggregate) {
@ -211,6 +211,25 @@ public class CreateMaterializedViewStmt extends DdlStmt {
throw new AnalysisException(
"The function " + functionName + " must match pattern:" + mvColumnPattern.toString());
}
// for bitmap_union(to_bitmap(column)) function, we should check value is not negative
// in vectorized schema_change mode, so we should rewrite the function to
// bitmap_union(to_bitmap_with_check(column))
if (functionName.equalsIgnoreCase("bitmap_union")) {
if (functionCallExpr.getChildren().size() == 1
&& functionCallExpr.getChild(0) instanceof FunctionCallExpr) {
Expr child = functionCallExpr.getChild(0);
FunctionCallExpr childFunctionCallExpr = (FunctionCallExpr) child;
if (childFunctionCallExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
childFunctionCallExpr.setFnName(
new FunctionName(childFunctionCallExpr.getFnName().getDb(),
"to_bitmap_with_check"));
childFunctionCallExpr.getFn().setName(
new FunctionName(childFunctionCallExpr.getFn().getFunctionName().getDb(),
"to_bitmap_with_check"));
}
}
}
}
// check duplicate column
List<SlotRef> slots = new ArrayList<>();
@ -256,7 +275,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
List<OrderByElement> orderByElements = selectStmt.getOrderByElements();
if (orderByElements.size() > mvColumnItemList.size()) {
throw new AnalysisException("The number of columns in order clause must be less than " + "the number of "
+ "columns in select clause");
+ "columns in select clause");
}
if (beginIndexOfAggregation != -1 && (orderByElements.size() != (beginIndexOfAggregation))) {
throw new AnalysisException("The key of columns in mv must be all of group by columns");
@ -265,13 +284,13 @@ public class CreateMaterializedViewStmt extends DdlStmt {
Expr orderByElement = orderByElements.get(i).getExpr();
if (!(orderByElement instanceof SlotRef)) {
throw new AnalysisException("The column in order clause must be original column without calculation. "
+ "Error column: " + orderByElement.toSql());
+ "Error column: " + orderByElement.toSql());
}
MVColumnItem mvColumnItem = mvColumnItemList.get(i);
SlotRef slotRef = (SlotRef) orderByElement;
if (!mvColumnItem.getName().equalsIgnoreCase(slotRef.getColumnName())) {
throw new AnalysisException("The order of columns in order by clause must be same as "
+ "the order of columns in select list");
+ "the order of columns in select list");
}
Preconditions.checkState(mvColumnItem.getAggregationType() == null);
mvColumnItem.setIsKey(true);
@ -451,7 +470,8 @@ public class CreateMaterializedViewStmt extends DdlStmt {
CastExpr castExpr = new CastExpr(new TypeDef(Type.VARCHAR), baseSlotRef);
List<Expr> params = Lists.newArrayList();
params.add(castExpr);
FunctionCallExpr defineExpr = new FunctionCallExpr(FunctionSet.TO_BITMAP, params);
FunctionCallExpr defineExpr =
new FunctionCallExpr(FunctionSet.TO_BITMAP_WITH_CHECK, params);
result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
} else {
result.put(baseColumnName, null);
@ -471,7 +491,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
case FunctionSet.COUNT:
Expr defineExpr = new CaseExpr(null, Lists.newArrayList(
new CaseWhenClause(new IsNullPredicate(slots.get(0), false),
new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
break;
default:

View File

@ -128,6 +128,10 @@ public class FunctionCallExpr extends Expr {
isTableFnCall = tableFnCall;
}
public void setFnName(FunctionName fnName) {
this.fnName = fnName;
}
public Function getFn() {
return fn;
}

View File

@ -44,7 +44,8 @@ public class MVColumnBitmapUnionPattern implements MVColumnPattern {
}
} else if (fnExpr.getChild(0) instanceof FunctionCallExpr) {
FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
&& !child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP_WITH_CHECK)) {
return false;
}
SlotRef slotRef = child0FnExpr.getChild(0).unwrapSlotRef();

View File

@ -201,6 +201,10 @@ public class Function implements Writable {
location = loc;
}
public void setName(FunctionName name) {
this.name = name;
}
public TFunctionBinaryType getBinaryType() {
return binaryType;
}

View File

@ -911,6 +911,7 @@ public class FunctionSet<T> {
.build();
public static final String TO_BITMAP = "to_bitmap";
public static final String TO_BITMAP_WITH_CHECK = "to_bitmap_with_check";
public static final String BITMAP_UNION = "bitmap_union";
public static final String BITMAP_UNION_COUNT = "bitmap_union_count";
public static final String BITMAP_UNION_INT = "bitmap_union_int";

View File

@ -41,6 +41,7 @@ public class FunctionCallEqualRule implements MVExprEqualRule {
builder.put(FunctionSet.HLL_UNION, "hll_union");
builder.put(FunctionSet.HLL_UNION, "hll_raw_agg");
builder.put(FunctionSet.TO_BITMAP, FunctionSet.TO_BITMAP);
builder.put(FunctionSet.TO_BITMAP_WITH_CHECK, FunctionSet.TO_BITMAP_WITH_CHECK);
builder.put(FunctionSet.HLL_HASH, FunctionSet.HLL_HASH);
columnAggTypeMatchFnName = builder.build();
}

View File

@ -65,7 +65,8 @@ public class ToBitmapToSlotRefRule implements ExprRewriteRule {
return expr;
}
FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")
&& !child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap_with_check")) {
return expr;
}
if (child0FnExpr.getChild(0) instanceof SlotRef) {

View File

@ -2421,6 +2421,9 @@ visible_functions = [
[['to_bitmap'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
@ -2430,6 +2433,9 @@ visible_functions = [
[['to_bitmap'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap_with_check'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions11bitmap_hash64EPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],