diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 6a45fc5c72..107362af52 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -30,6 +30,14 @@ void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) { dst->ptr = (uint8_t*)new RoaringBitmap(); } +StringVal BitmapFunctions::bitmap_empty(FunctionContext* ctx) { + RoaringBitmap bitmap; + std::string buf; + buf.resize(bitmap.size()); + bitmap.serialize((char*)buf.c_str()); + return AnyValUtil::from_string_temp(ctx, buf); +} + template void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst) { if (src.is_null) { diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h index 65b866df65..a8fa9238e2 100644 --- a/be/src/exprs/bitmap_function.h +++ b/be/src/exprs/bitmap_function.h @@ -26,6 +26,7 @@ class BitmapFunctions { public: static void init(); static void bitmap_init(FunctionContext* ctx, StringVal* slot); + static StringVal bitmap_empty(FunctionContext* ctx); template static void bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst); // the input src's ptr need to point a RoaringBitmap, this function will release the diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp index 0c0a45adcc..228a1042c0 100644 --- a/be/test/exprs/bitmap_function_test.cpp +++ b/be/test/exprs/bitmap_function_test.cpp @@ -52,6 +52,15 @@ private: FunctionContext* ctx; }; +TEST_F(BitmapFunctionsTest, bitmap_empty) { + StringVal result = BitmapFunctions::bitmap_empty(ctx); + + RoaringBitmap bitmap; + StringVal expected = convert_bitmap_to_string(ctx, bitmap); + + ASSERT_EQ(expected, result); +} + TEST_F(BitmapFunctionsTest, to_bitmap) { StringVal input = AnyValUtil::from_string_temp(ctx, std::string("1024")); StringVal result = BitmapFunctions::to_bitmap(ctx, input); diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md index f5a916d1c7..be6a3b0345 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/bitmap.md @@ -31,6 +31,8 @@ under the License. `BITMAP_UNION_INT(expr)` : 计算TINYINT,SMALLINT和INT类型的列中不同值的个数,返回值和 COUNT(DISTINCT expr)相同 +`BITMAP_EMPTY()`: 生成空Bitmap列,用于insert或导入的时填充默认值 + 注意: @@ -85,4 +87,4 @@ mysql> select bitmap_count(bitmap_union(id2)) from bitmap_test; ## keyword -BITMAP,BITMAP_COUNT,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP +BITMAP,BITMAP_COUNT,BITMAP_EMPTY,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md index edb642859c..cd392665f6 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md @@ -111,14 +111,14 @@ under the License. 6. 使用streaming方式导入(用户是defalut_cluster中的) seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load - 7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用empty_hll补充数据中没有的列 - curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load + 7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用hll_empty补充数据中没有的列 + curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load 8. 导入数据进行严格模式过滤,并设置时区为 Africa/Abidjan curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load - 9. 导入含有聚合模型为BITMAP_UNION列的表,可以是表中的列或者数据中的列用于生成BITMAP_UNION列 - curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load + 9. 导入含有BITMAP列的表,可以是表中的列或者数据中的列用于生成BITMAP列,也可以使用bitmap_empty填充空的Bitmap + curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load ## keyword diff --git a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md index f711f91c69..bdfc36a159 100644 --- a/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md +++ b/docs/documentation/en/sql-reference/sql-functions/aggregate-functions/bitmap_EN.md @@ -31,6 +31,8 @@ under the License. `BITMAP_UNION_INT(expr)` : Calculate the distinct value number of TINYINT,SMALLINT and INT type column. Same as COUNT(DISTINCT expr) +`BITMAP_EMPTY()`: Generate empty bitmap column for insert into or load data. + Notice: 1. TO_BITMAP function only receives TINYINT,SMALLINT,INT. diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md index 907869c86b..3d04ca9eb2 100644 --- a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD_EN.md @@ -168,17 +168,17 @@ Where url is the url given by ErrorURL. ```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load``` -7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use empty_hll to supplement columns that are not in the data +7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use hll_empty to supplement columns that are not in the data - ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load``` + ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load``` 8. load data for strict mode filtering and set the time zone to Africa/Abidjan ```Curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load``` -9. load a table with an aggregate model of `BITMAP_UNION`, either a column in the table or a column in the data to generate a `BITMAP_UNION` column +9. load a table with BITMAP columns, which can be columns in the table or a column in the data used to generate BITMAP columns, you can also use bitmap_empty to supplement columns that are not in the data - ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load``` + ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load``` ## keyword diff --git a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java index c823e1fa94..b456310e5f 100644 --- a/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java +++ b/fe/src/main/java/org/apache/doris/analysis/InsertStmt.java @@ -642,8 +642,9 @@ public class InsertStmt extends DdlStmt { private void checkBitmapCompatibility(Column col, Expr expr) throws AnalysisException { boolean isCompatible = false; - final String bitmapMismatchLog = "Column's agg type is bitmap_union," - + " SelectList must contains bitmap_union column, to_bitmap or bitmap_union function's result, column=" + col.getName(); + final String bitmapMismatchLog = "Column's type is BITMAP," + + " SelectList must contains BITMAP column, to_bitmap or bitmap_union" + + " or bitmap_empty function's result, column=" + col.getName(); if (expr instanceof SlotRef) { final SlotRef slot = (SlotRef) expr; Column column = slot.getDesc().getColumn(); @@ -660,8 +661,11 @@ public class InsertStmt extends DdlStmt { } } else if (expr instanceof FunctionCallExpr) { final FunctionCallExpr functionExpr = (FunctionCallExpr) expr; - if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) { - isCompatible = true; // select id, to_bitmap(id2) from table; + // select id, to_bitmap(id2) from table + // select id, bitmap_empty from table + if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP) + || functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) { + isCompatible = true; } } diff --git a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java index 03ca91eadf..d9964ab1b1 100644 --- a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -513,8 +513,10 @@ public class FunctionSet { public static final String BITMAP_UNION = "bitmap_union"; public static final String BITMAP_UNION_INT = "bitmap_union_int"; public static final String BITMAP_COUNT = "bitmap_count"; + public static final String BITMAP_EMPTY = "bitmap_empty"; public static final String TO_BITMAP = "to_bitmap"; + private static final Map BITMAP_UNION_INT_SYMBOL = ImmutableMap.builder() .put(Type.TINYINT, diff --git a/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java index 29a6dc6252..4caec0fa59 100644 --- a/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/LoadScanNode.java @@ -20,10 +20,14 @@ package org.apache.doris.planner; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ExprSubstitutionMap; +import org.apache.doris.analysis.FunctionCallExpr; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.TupleDescriptor; +import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.FunctionSet; import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; import com.google.common.collect.Lists; @@ -72,4 +76,24 @@ public abstract class LoadScanNode extends ScanNode { addConjuncts(whereExpr.getConjuncts()); } + protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException { + boolean isCompatible = true; + if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) { + if (!(expr instanceof FunctionCallExpr)) { + isCompatible = false; + } else { + FunctionCallExpr fn = (FunctionCallExpr) expr; + if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP) + && !fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) { + isCompatible = false; + } + } + } + if (!isCompatible) { + throw new AnalysisException("bitmap column must use to_bitmap or empty_bitmap function, like " + + slotDesc.getColumn().getName() + "=to_bitmap(xxx)" + + slotDesc.getColumn().getName() + "=bitmap_empty()"); + } + } + } diff --git a/fe/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/src/main/java/org/apache/doris/planner/ScanNode.java index 3e9bc86651..75f6ab3fb8 100644 --- a/fe/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/src/main/java/org/apache/doris/planner/ScanNode.java @@ -18,13 +18,9 @@ package org.apache.doris.planner; import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.FunctionCallExpr; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.TupleDescriptor; -import org.apache.doris.catalog.AggregateType; -import org.apache.doris.catalog.FunctionSet; import org.apache.doris.catalog.PrimitiveType; -import org.apache.doris.common.AnalysisException; import org.apache.doris.common.UserException; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TScanRangeLocations; @@ -80,24 +76,6 @@ abstract public class ScanNode extends PlanNode { } } - protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException { - boolean isCompatible = true; - if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) { - if (!(expr instanceof FunctionCallExpr)) { - isCompatible = false; - } else { - FunctionCallExpr fn = (FunctionCallExpr) expr; - if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) { - isCompatible = false; - } - } - } - if (!isCompatible) { - throw new AnalysisException("bitmap_union column must use to_bitmap function, like " - + slotDesc.getColumn().getName() + "=to_bitmap(xxx)"); - } - } - /** * Returns all scan ranges plus their locations. Needs to be preceded by a call to * finalize(). diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index b57add459b..d5e1abc7ad 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -601,6 +601,8 @@ visible_functions = [ '_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE'], [['bitmap_count'], 'BIGINT', ['VARCHAR'], '_ZN5doris15BitmapFunctions12bitmap_countEPN9doris_udf15FunctionContextERKNS1_9StringValE'], + [['bitmap_empty'], 'VARCHAR', [], + '_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE'], # aes and base64 function