[Bugfix](vec) Fix all create mv using to_bitmap() on negative value columns when enable_vectorized_alter_table is true (#13448)
* [Bugfix] add negtive value check when create mv using vec
This commit is contained in:
@ -619,7 +619,8 @@ Status RowBlockChanger::change_row_block(const RowBlock* ref_block, int32_t data
|
||||
if (!_schema_mapping[i].materialized_function.empty()) {
|
||||
bool (*_do_materialized_transform)(RowCursor*, RowCursor*, const TabletColumn&, int,
|
||||
int, MemPool*) = nullptr;
|
||||
if (_schema_mapping[i].materialized_function == "to_bitmap") {
|
||||
if (_schema_mapping[i].materialized_function == "to_bitmap" ||
|
||||
_schema_mapping[i].materialized_function == "to_bitmap_with_check") {
|
||||
_do_materialized_transform = to_bitmap;
|
||||
} else if (_schema_mapping[i].materialized_function == "hll_hash") {
|
||||
_do_materialized_transform = hll_hash;
|
||||
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
template <typename Function>
|
||||
template <typename Function, bool WithReturn = false>
|
||||
class FunctionAlwaysNotNullable : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = Function::name;
|
||||
@ -62,8 +62,14 @@ public:
|
||||
col_nullable->get_null_map_column_ptr().get());
|
||||
|
||||
if (col != nullptr && col_nullmap != nullptr) {
|
||||
Function::vector_nullable(col->get_chars(), col->get_offsets(),
|
||||
col_nullmap->get_data(), column_result);
|
||||
if constexpr (WithReturn) {
|
||||
RETURN_IF_ERROR(Function::vector_nullable(col->get_chars(), col->get_offsets(),
|
||||
col_nullmap->get_data(),
|
||||
column_result));
|
||||
} else {
|
||||
Function::vector_nullable(col->get_chars(), col->get_offsets(),
|
||||
col_nullmap->get_data(), column_result);
|
||||
}
|
||||
|
||||
block.replace_by_position(result, std::move(column_result));
|
||||
return Status::OK();
|
||||
@ -71,8 +77,12 @@ public:
|
||||
return type_error();
|
||||
}
|
||||
} else if (const ColumnString* col = check_and_get_column<ColumnString>(column.get())) {
|
||||
Function::vector(col->get_chars(), col->get_offsets(), column_result);
|
||||
|
||||
if constexpr (WithReturn) {
|
||||
RETURN_IF_ERROR(
|
||||
Function::vector(col->get_chars(), col->get_offsets(), column_result));
|
||||
} else {
|
||||
Function::vector(col->get_chars(), col->get_offsets(), column_result);
|
||||
}
|
||||
block.replace_by_position(result, std::move(column_result));
|
||||
return Status::OK();
|
||||
} else {
|
||||
|
||||
@ -75,6 +75,51 @@ struct ToBitmap {
|
||||
}
|
||||
};
|
||||
|
||||
struct ToBitmapWithCheck {
|
||||
static constexpr auto name = "to_bitmap_with_check";
|
||||
using ReturnType = DataTypeBitMap;
|
||||
|
||||
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
MutableColumnPtr& col_res) {
|
||||
return execute<false>(data, offsets, nullptr, col_res);
|
||||
}
|
||||
|
||||
static Status vector_nullable(const ColumnString::Chars& data,
|
||||
const ColumnString::Offsets& offsets, const NullMap& nullmap,
|
||||
MutableColumnPtr& col_res) {
|
||||
return execute<true>(data, offsets, &nullmap, col_res);
|
||||
}
|
||||
template <bool arg_is_nullable>
|
||||
static Status execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
const NullMap* nullmap, MutableColumnPtr& col_res) {
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (arg_is_nullable && ((*nullmap)[i])) {
|
||||
continue;
|
||||
} else {
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
|
||||
raw_str, str_size, &parse_result);
|
||||
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
|
||||
res_data[i].add(int_value);
|
||||
} else {
|
||||
LOG(WARNING) << "The input: " << raw_str
|
||||
<< " is not valid, to_bitmap only support bigint value from 0 to "
|
||||
"18446744073709551615 currently";
|
||||
return Status::InternalError(
|
||||
"bitmap value must be in [0, 18446744073709551615)");
|
||||
}
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
|
||||
struct BitmapFromString {
|
||||
static constexpr auto name = "bitmap_from_string";
|
||||
|
||||
@ -536,6 +581,8 @@ public:
|
||||
|
||||
using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
|
||||
using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
|
||||
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck, true>;
|
||||
|
||||
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
|
||||
using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
|
||||
using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
|
||||
@ -564,6 +611,7 @@ using FunctionBitmapSubsetInRange = FunctionBitmapSubs<BitmapSubsetInRange>;
|
||||
void register_function_bitmap(SimpleFunctionFactory& factory) {
|
||||
factory.register_function<FunctionBitmapEmpty>();
|
||||
factory.register_function<FunctionToBitmap>();
|
||||
factory.register_function<FunctionToBitmapWithCheck>();
|
||||
factory.register_function<FunctionBitmapFromString>();
|
||||
factory.register_function<FunctionBitmapHash>();
|
||||
factory.register_function<FunctionBitmapHash64>();
|
||||
|
||||
@ -64,11 +64,11 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
static {
|
||||
FN_NAME_TO_PATTERN = Maps.newHashMap();
|
||||
FN_NAME_TO_PATTERN.put(AggregateType.SUM.name().toLowerCase(),
|
||||
new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
|
||||
new MVColumnOneChildPattern(AggregateType.SUM.name().toLowerCase()));
|
||||
FN_NAME_TO_PATTERN.put(AggregateType.MIN.name().toLowerCase(),
|
||||
new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
|
||||
new MVColumnOneChildPattern(AggregateType.MIN.name().toLowerCase()));
|
||||
FN_NAME_TO_PATTERN.put(AggregateType.MAX.name().toLowerCase(),
|
||||
new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
|
||||
new MVColumnOneChildPattern(AggregateType.MAX.name().toLowerCase()));
|
||||
FN_NAME_TO_PATTERN.put(FunctionSet.COUNT, new MVColumnOneChildPattern(FunctionSet.COUNT));
|
||||
FN_NAME_TO_PATTERN.put(FunctionSet.BITMAP_UNION, new MVColumnBitmapUnionPattern());
|
||||
FN_NAME_TO_PATTERN.put(FunctionSet.HLL_UNION, new MVColumnHLLUnionPattern());
|
||||
@ -147,16 +147,16 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
analyzeFromClause();
|
||||
if (selectStmt.getWhereClause() != null) {
|
||||
throw new AnalysisException("The where clause is not supported in add materialized view clause, expr:"
|
||||
+ selectStmt.getWhereClause().toSql());
|
||||
+ selectStmt.getWhereClause().toSql());
|
||||
}
|
||||
if (selectStmt.getHavingPred() != null) {
|
||||
throw new AnalysisException("The having clause is not supported in add materialized view clause, expr:"
|
||||
+ selectStmt.getHavingPred().toSql());
|
||||
+ selectStmt.getHavingPred().toSql());
|
||||
}
|
||||
analyzeOrderByClause();
|
||||
if (selectStmt.getLimit() != -1) {
|
||||
throw new AnalysisException("The limit clause is not supported in add materialized view clause, expr:"
|
||||
+ " limit " + selectStmt.getLimit());
|
||||
+ " limit " + selectStmt.getLimit());
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,7 +182,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
Expr selectListItemExpr = selectListItem.getExpr();
|
||||
if (!(selectListItemExpr instanceof SlotRef) && !(selectListItemExpr instanceof FunctionCallExpr)) {
|
||||
throw new AnalysisException("The materialized view only support the single column or function expr. "
|
||||
+ "Error column: " + selectListItemExpr.toSql());
|
||||
+ "Error column: " + selectListItemExpr.toSql());
|
||||
}
|
||||
if (selectListItemExpr instanceof SlotRef) {
|
||||
if (meetAggregate) {
|
||||
@ -211,6 +211,25 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
throw new AnalysisException(
|
||||
"The function " + functionName + " must match pattern:" + mvColumnPattern.toString());
|
||||
}
|
||||
|
||||
// for bitmap_union(to_bitmap(column)) function, we should check value is not negative
|
||||
// in vectorized schema_change mode, so we should rewrite the function to
|
||||
// bitmap_union(to_bitmap_with_check(column))
|
||||
if (functionName.equalsIgnoreCase("bitmap_union")) {
|
||||
if (functionCallExpr.getChildren().size() == 1
|
||||
&& functionCallExpr.getChild(0) instanceof FunctionCallExpr) {
|
||||
Expr child = functionCallExpr.getChild(0);
|
||||
FunctionCallExpr childFunctionCallExpr = (FunctionCallExpr) child;
|
||||
if (childFunctionCallExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
|
||||
childFunctionCallExpr.setFnName(
|
||||
new FunctionName(childFunctionCallExpr.getFnName().getDb(),
|
||||
"to_bitmap_with_check"));
|
||||
childFunctionCallExpr.getFn().setName(
|
||||
new FunctionName(childFunctionCallExpr.getFn().getFunctionName().getDb(),
|
||||
"to_bitmap_with_check"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// check duplicate column
|
||||
List<SlotRef> slots = new ArrayList<>();
|
||||
@ -256,7 +275,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
List<OrderByElement> orderByElements = selectStmt.getOrderByElements();
|
||||
if (orderByElements.size() > mvColumnItemList.size()) {
|
||||
throw new AnalysisException("The number of columns in order clause must be less than " + "the number of "
|
||||
+ "columns in select clause");
|
||||
+ "columns in select clause");
|
||||
}
|
||||
if (beginIndexOfAggregation != -1 && (orderByElements.size() != (beginIndexOfAggregation))) {
|
||||
throw new AnalysisException("The key of columns in mv must be all of group by columns");
|
||||
@ -265,13 +284,13 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
Expr orderByElement = orderByElements.get(i).getExpr();
|
||||
if (!(orderByElement instanceof SlotRef)) {
|
||||
throw new AnalysisException("The column in order clause must be original column without calculation. "
|
||||
+ "Error column: " + orderByElement.toSql());
|
||||
+ "Error column: " + orderByElement.toSql());
|
||||
}
|
||||
MVColumnItem mvColumnItem = mvColumnItemList.get(i);
|
||||
SlotRef slotRef = (SlotRef) orderByElement;
|
||||
if (!mvColumnItem.getName().equalsIgnoreCase(slotRef.getColumnName())) {
|
||||
throw new AnalysisException("The order of columns in order by clause must be same as "
|
||||
+ "the order of columns in select list");
|
||||
+ "the order of columns in select list");
|
||||
}
|
||||
Preconditions.checkState(mvColumnItem.getAggregationType() == null);
|
||||
mvColumnItem.setIsKey(true);
|
||||
@ -451,7 +470,8 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
CastExpr castExpr = new CastExpr(new TypeDef(Type.VARCHAR), baseSlotRef);
|
||||
List<Expr> params = Lists.newArrayList();
|
||||
params.add(castExpr);
|
||||
FunctionCallExpr defineExpr = new FunctionCallExpr(FunctionSet.TO_BITMAP, params);
|
||||
FunctionCallExpr defineExpr =
|
||||
new FunctionCallExpr(FunctionSet.TO_BITMAP_WITH_CHECK, params);
|
||||
result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
|
||||
} else {
|
||||
result.put(baseColumnName, null);
|
||||
@ -471,7 +491,7 @@ public class CreateMaterializedViewStmt extends DdlStmt {
|
||||
case FunctionSet.COUNT:
|
||||
Expr defineExpr = new CaseExpr(null, Lists.newArrayList(
|
||||
new CaseWhenClause(new IsNullPredicate(slots.get(0), false),
|
||||
new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
|
||||
new IntLiteral(0, Type.BIGINT))), new IntLiteral(1, Type.BIGINT));
|
||||
result.put(mvColumnBuilder(functionName, baseColumnName), defineExpr);
|
||||
break;
|
||||
default:
|
||||
|
||||
@ -128,6 +128,10 @@ public class FunctionCallExpr extends Expr {
|
||||
isTableFnCall = tableFnCall;
|
||||
}
|
||||
|
||||
public void setFnName(FunctionName fnName) {
|
||||
this.fnName = fnName;
|
||||
}
|
||||
|
||||
public Function getFn() {
|
||||
return fn;
|
||||
}
|
||||
|
||||
@ -44,7 +44,8 @@ public class MVColumnBitmapUnionPattern implements MVColumnPattern {
|
||||
}
|
||||
} else if (fnExpr.getChild(0) instanceof FunctionCallExpr) {
|
||||
FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
|
||||
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
|
||||
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
|
||||
&& !child0FnExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP_WITH_CHECK)) {
|
||||
return false;
|
||||
}
|
||||
SlotRef slotRef = child0FnExpr.getChild(0).unwrapSlotRef();
|
||||
|
||||
@ -201,6 +201,10 @@ public class Function implements Writable {
|
||||
location = loc;
|
||||
}
|
||||
|
||||
public void setName(FunctionName name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public TFunctionBinaryType getBinaryType() {
|
||||
return binaryType;
|
||||
}
|
||||
|
||||
@ -911,6 +911,7 @@ public class FunctionSet<T> {
|
||||
.build();
|
||||
|
||||
public static final String TO_BITMAP = "to_bitmap";
|
||||
public static final String TO_BITMAP_WITH_CHECK = "to_bitmap_with_check";
|
||||
public static final String BITMAP_UNION = "bitmap_union";
|
||||
public static final String BITMAP_UNION_COUNT = "bitmap_union_count";
|
||||
public static final String BITMAP_UNION_INT = "bitmap_union_int";
|
||||
|
||||
@ -41,6 +41,7 @@ public class FunctionCallEqualRule implements MVExprEqualRule {
|
||||
builder.put(FunctionSet.HLL_UNION, "hll_union");
|
||||
builder.put(FunctionSet.HLL_UNION, "hll_raw_agg");
|
||||
builder.put(FunctionSet.TO_BITMAP, FunctionSet.TO_BITMAP);
|
||||
builder.put(FunctionSet.TO_BITMAP_WITH_CHECK, FunctionSet.TO_BITMAP_WITH_CHECK);
|
||||
builder.put(FunctionSet.HLL_HASH, FunctionSet.HLL_HASH);
|
||||
columnAggTypeMatchFnName = builder.build();
|
||||
}
|
||||
|
||||
@ -65,7 +65,8 @@ public class ToBitmapToSlotRefRule implements ExprRewriteRule {
|
||||
return expr;
|
||||
}
|
||||
FunctionCallExpr child0FnExpr = (FunctionCallExpr) fnExpr.getChild(0);
|
||||
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")) {
|
||||
if (!child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap")
|
||||
&& !child0FnExpr.getFnName().getFunction().equalsIgnoreCase("to_bitmap_with_check")) {
|
||||
return expr;
|
||||
}
|
||||
if (child0FnExpr.getChild(0) instanceof SlotRef) {
|
||||
|
||||
@ -2421,6 +2421,9 @@ visible_functions = [
|
||||
[['to_bitmap'], 'BITMAP', ['VARCHAR'],
|
||||
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
|
||||
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
|
||||
[['to_bitmap_with_check'], 'BITMAP', ['VARCHAR'],
|
||||
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
|
||||
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
|
||||
[['bitmap_hash'], 'BITMAP', ['VARCHAR'],
|
||||
'_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE',
|
||||
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
|
||||
@ -2430,6 +2433,9 @@ visible_functions = [
|
||||
[['to_bitmap'], 'BITMAP', ['STRING'],
|
||||
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
|
||||
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
|
||||
[['to_bitmap_with_check'], 'BITMAP', ['STRING'],
|
||||
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
|
||||
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
|
||||
[['bitmap_hash'], 'BITMAP', ['STRING'],
|
||||
'_ZN5doris15BitmapFunctions11bitmap_hash64EPN9doris_udf15FunctionContextERKNS1_9StringValE',
|
||||
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
|
||||
|
||||
Reference in New Issue
Block a user