Support bitmap_empty function (#2227)
This commit is contained in:
@ -30,6 +30,14 @@ void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) {
|
||||
dst->ptr = (uint8_t*)new RoaringBitmap();
|
||||
}
|
||||
|
||||
StringVal BitmapFunctions::bitmap_empty(FunctionContext* ctx) {
|
||||
RoaringBitmap bitmap;
|
||||
std::string buf;
|
||||
buf.resize(bitmap.size());
|
||||
bitmap.serialize((char*)buf.c_str());
|
||||
return AnyValUtil::from_string_temp(ctx, buf);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst) {
|
||||
if (src.is_null) {
|
||||
|
||||
@ -26,6 +26,7 @@ class BitmapFunctions {
|
||||
public:
|
||||
static void init();
|
||||
static void bitmap_init(FunctionContext* ctx, StringVal* slot);
|
||||
static StringVal bitmap_empty(FunctionContext* ctx);
|
||||
template <typename T>
|
||||
static void bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst);
|
||||
// the input src's ptr need to point a RoaringBitmap, this function will release the
|
||||
|
||||
@ -52,6 +52,15 @@ private:
|
||||
FunctionContext* ctx;
|
||||
};
|
||||
|
||||
TEST_F(BitmapFunctionsTest, bitmap_empty) {
|
||||
StringVal result = BitmapFunctions::bitmap_empty(ctx);
|
||||
|
||||
RoaringBitmap bitmap;
|
||||
StringVal expected = convert_bitmap_to_string(ctx, bitmap);
|
||||
|
||||
ASSERT_EQ(expected, result);
|
||||
}
|
||||
|
||||
TEST_F(BitmapFunctionsTest, to_bitmap) {
|
||||
StringVal input = AnyValUtil::from_string_temp(ctx, std::string("1024"));
|
||||
StringVal result = BitmapFunctions::to_bitmap(ctx, input);
|
||||
|
||||
@ -31,6 +31,8 @@ under the License.
|
||||
`BITMAP_UNION_INT(expr)` : 计算TINYINT,SMALLINT和INT类型的列中不同值的个数,返回值和
|
||||
COUNT(DISTINCT expr)相同
|
||||
|
||||
`BITMAP_EMPTY()`: 生成空Bitmap列,用于insert或导入的时填充默认值
|
||||
|
||||
|
||||
注意:
|
||||
|
||||
@ -85,4 +87,4 @@ mysql> select bitmap_count(bitmap_union(id2)) from bitmap_test;
|
||||
|
||||
## keyword
|
||||
|
||||
BITMAP,BITMAP_COUNT,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP
|
||||
BITMAP,BITMAP_COUNT,BITMAP_EMPTY,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP
|
||||
|
||||
@ -111,14 +111,14 @@ under the License.
|
||||
6. 使用streaming方式导入(用户是defalut_cluster中的)
|
||||
seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load
|
||||
|
||||
7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用empty_hll补充数据中没有的列
|
||||
curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load
|
||||
7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用hll_empty补充数据中没有的列
|
||||
curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
|
||||
|
||||
8. 导入数据进行严格模式过滤,并设置时区为 Africa/Abidjan
|
||||
curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load
|
||||
|
||||
9. 导入含有聚合模型为BITMAP_UNION列的表,可以是表中的列或者数据中的列用于生成BITMAP_UNION列
|
||||
curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load
|
||||
9. 导入含有BITMAP列的表,可以是表中的列或者数据中的列用于生成BITMAP列,也可以使用bitmap_empty填充空的Bitmap
|
||||
curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
|
||||
|
||||
|
||||
## keyword
|
||||
|
||||
@ -31,6 +31,8 @@ under the License.
|
||||
|
||||
`BITMAP_UNION_INT(expr)` : Calculate the distinct value number of TINYINT,SMALLINT and INT type column. Same as COUNT(DISTINCT expr)
|
||||
|
||||
`BITMAP_EMPTY()`: Generate empty bitmap column for insert into or load data.
|
||||
|
||||
Notice:
|
||||
|
||||
1. TO_BITMAP function only receives TINYINT,SMALLINT,INT.
|
||||
|
||||
@ -168,17 +168,17 @@ Where url is the url given by ErrorURL.
|
||||
|
||||
```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load```
|
||||
|
||||
7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use empty_hll to supplement columns that are not in the data
|
||||
7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use hll_empty to supplement columns that are not in the data
|
||||
|
||||
```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
|
||||
```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
|
||||
|
||||
8. load data for strict mode filtering and set the time zone to Africa/Abidjan
|
||||
|
||||
```Curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load```
|
||||
|
||||
9. load a table with an aggregate model of `BITMAP_UNION`, either a column in the table or a column in the data to generate a `BITMAP_UNION` column
|
||||
9. load a table with BITMAP columns, which can be columns in the table or a column in the data used to generate BITMAP columns, you can also use bitmap_empty to supplement columns that are not in the data
|
||||
|
||||
```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load```
|
||||
```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
|
||||
|
||||
|
||||
## keyword
|
||||
|
||||
@ -642,8 +642,9 @@ public class InsertStmt extends DdlStmt {
|
||||
|
||||
private void checkBitmapCompatibility(Column col, Expr expr) throws AnalysisException {
|
||||
boolean isCompatible = false;
|
||||
final String bitmapMismatchLog = "Column's agg type is bitmap_union,"
|
||||
+ " SelectList must contains bitmap_union column, to_bitmap or bitmap_union function's result, column=" + col.getName();
|
||||
final String bitmapMismatchLog = "Column's type is BITMAP,"
|
||||
+ " SelectList must contains BITMAP column, to_bitmap or bitmap_union" +
|
||||
" or bitmap_empty function's result, column=" + col.getName();
|
||||
if (expr instanceof SlotRef) {
|
||||
final SlotRef slot = (SlotRef) expr;
|
||||
Column column = slot.getDesc().getColumn();
|
||||
@ -660,8 +661,11 @@ public class InsertStmt extends DdlStmt {
|
||||
}
|
||||
} else if (expr instanceof FunctionCallExpr) {
|
||||
final FunctionCallExpr functionExpr = (FunctionCallExpr) expr;
|
||||
if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
|
||||
isCompatible = true; // select id, to_bitmap(id2) from table;
|
||||
// select id, to_bitmap(id2) from table
|
||||
// select id, bitmap_empty from table
|
||||
if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
|
||||
|| functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
|
||||
isCompatible = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -513,8 +513,10 @@ public class FunctionSet {
|
||||
public static final String BITMAP_UNION = "bitmap_union";
|
||||
public static final String BITMAP_UNION_INT = "bitmap_union_int";
|
||||
public static final String BITMAP_COUNT = "bitmap_count";
|
||||
public static final String BITMAP_EMPTY = "bitmap_empty";
|
||||
public static final String TO_BITMAP = "to_bitmap";
|
||||
|
||||
|
||||
private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL =
|
||||
ImmutableMap.<Type, String>builder()
|
||||
.put(Type.TINYINT,
|
||||
|
||||
@ -20,10 +20,14 @@ package org.apache.doris.planner;
|
||||
import org.apache.doris.analysis.Analyzer;
|
||||
import org.apache.doris.analysis.Expr;
|
||||
import org.apache.doris.analysis.ExprSubstitutionMap;
|
||||
import org.apache.doris.analysis.FunctionCallExpr;
|
||||
import org.apache.doris.analysis.SlotDescriptor;
|
||||
import org.apache.doris.analysis.SlotRef;
|
||||
import org.apache.doris.analysis.TupleDescriptor;
|
||||
import org.apache.doris.catalog.AggregateType;
|
||||
import org.apache.doris.catalog.FunctionSet;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.UserException;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
@ -72,4 +76,24 @@ public abstract class LoadScanNode extends ScanNode {
|
||||
addConjuncts(whereExpr.getConjuncts());
|
||||
}
|
||||
|
||||
protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
|
||||
boolean isCompatible = true;
|
||||
if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
|
||||
if (!(expr instanceof FunctionCallExpr)) {
|
||||
isCompatible = false;
|
||||
} else {
|
||||
FunctionCallExpr fn = (FunctionCallExpr) expr;
|
||||
if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
|
||||
&& !fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
|
||||
isCompatible = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!isCompatible) {
|
||||
throw new AnalysisException("bitmap column must use to_bitmap or empty_bitmap function, like "
|
||||
+ slotDesc.getColumn().getName() + "=to_bitmap(xxx)"
|
||||
+ slotDesc.getColumn().getName() + "=bitmap_empty()");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -18,13 +18,9 @@
|
||||
package org.apache.doris.planner;
|
||||
|
||||
import org.apache.doris.analysis.Expr;
|
||||
import org.apache.doris.analysis.FunctionCallExpr;
|
||||
import org.apache.doris.analysis.SlotDescriptor;
|
||||
import org.apache.doris.analysis.TupleDescriptor;
|
||||
import org.apache.doris.catalog.AggregateType;
|
||||
import org.apache.doris.catalog.FunctionSet;
|
||||
import org.apache.doris.catalog.PrimitiveType;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.common.UserException;
|
||||
import org.apache.doris.thrift.TNetworkAddress;
|
||||
import org.apache.doris.thrift.TScanRangeLocations;
|
||||
@ -80,24 +76,6 @@ abstract public class ScanNode extends PlanNode {
|
||||
}
|
||||
}
|
||||
|
||||
protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
|
||||
boolean isCompatible = true;
|
||||
if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
|
||||
if (!(expr instanceof FunctionCallExpr)) {
|
||||
isCompatible = false;
|
||||
} else {
|
||||
FunctionCallExpr fn = (FunctionCallExpr) expr;
|
||||
if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
|
||||
isCompatible = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!isCompatible) {
|
||||
throw new AnalysisException("bitmap_union column must use to_bitmap function, like "
|
||||
+ slotDesc.getColumn().getName() + "=to_bitmap(xxx)");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all scan ranges plus their locations. Needs to be preceded by a call to
|
||||
* finalize().
|
||||
|
||||
@ -601,6 +601,8 @@ visible_functions = [
|
||||
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
|
||||
[['bitmap_count'], 'BIGINT', ['VARCHAR'],
|
||||
'_ZN5doris15BitmapFunctions12bitmap_countEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
|
||||
[['bitmap_empty'], 'VARCHAR', [],
|
||||
'_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE'],
|
||||
|
||||
|
||||
# aes and base64 function
|
||||
|
||||
Reference in New Issue
Block a user