Support bitmap_empty function (#2227)

This commit is contained in:
kangkaisen
2019-11-18 20:37:00 +08:00
committed by ZHAO Chun
parent 626001fae4
commit d8cfbbedf7
12 changed files with 67 additions and 35 deletions

View File

@ -30,6 +30,14 @@ void BitmapFunctions::bitmap_init(FunctionContext* ctx, StringVal* dst) {
dst->ptr = (uint8_t*)new RoaringBitmap();
}
StringVal BitmapFunctions::bitmap_empty(FunctionContext* ctx) {
RoaringBitmap bitmap;
std::string buf;
buf.resize(bitmap.size());
bitmap.serialize((char*)buf.c_str());
return AnyValUtil::from_string_temp(ctx, buf);
}
template <typename T>
void BitmapFunctions::bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst) {
if (src.is_null) {

View File

@ -26,6 +26,7 @@ class BitmapFunctions {
public:
static void init();
static void bitmap_init(FunctionContext* ctx, StringVal* slot);
static StringVal bitmap_empty(FunctionContext* ctx);
template <typename T>
static void bitmap_update_int(FunctionContext* ctx, const T& src, StringVal* dst);
// the input src's ptr need to point a RoaringBitmap, this function will release the

View File

@ -52,6 +52,15 @@ private:
FunctionContext* ctx;
};
TEST_F(BitmapFunctionsTest, bitmap_empty) {
StringVal result = BitmapFunctions::bitmap_empty(ctx);
RoaringBitmap bitmap;
StringVal expected = convert_bitmap_to_string(ctx, bitmap);
ASSERT_EQ(expected, result);
}
TEST_F(BitmapFunctionsTest, to_bitmap) {
StringVal input = AnyValUtil::from_string_temp(ctx, std::string("1024"));
StringVal result = BitmapFunctions::to_bitmap(ctx, input);

View File

@ -31,6 +31,8 @@ under the License.
`BITMAP_UNION_INT(expr)` : 计算TINYINT,SMALLINT和INT类型的列中不同值的个数,返回值和
COUNT(DISTINCT expr)相同
`BITMAP_EMPTY()`: 生成空Bitmap列,用于insert或导入的时填充默认值
注意:
@ -85,4 +87,4 @@ mysql> select bitmap_count(bitmap_union(id2)) from bitmap_test;
## keyword
BITMAP,BITMAP_COUNT,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP
BITMAP,BITMAP_COUNT,BITMAP_EMPTY,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP

View File

@ -111,14 +111,14 @@ under the License.
6. 使用streaming方式导入(用户是defalut_cluster中的)
seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load
7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用empty_hll补充数据中没有的列
curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load
7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用hll_empty补充数据中没有的列
curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
8. 导入数据进行严格模式过滤,并设置时区为 Africa/Abidjan
curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load
9. 导入含有聚合模型为BITMAP_UNION列的表,可以是表中的列或者数据中的列用于生成BITMAP_UNION列
curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load
9. 导入含有BITMAP列的表,可以是表中的列或者数据中的列用于生成BITMAP列,也可以使用bitmap_empty填充空的Bitmap
curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load
## keyword

View File

@ -31,6 +31,8 @@ under the License.
`BITMAP_UNION_INT(expr)` : Calculate the distinct value number of TINYINT,SMALLINT and INT type column. Same as COUNT(DISTINCT expr)
`BITMAP_EMPTY()`: Generate empty bitmap column for insert into or load data.
Notice:
1. TO_BITMAP function only receives TINYINT,SMALLINT,INT.

View File

@ -168,17 +168,17 @@ Where url is the url given by ErrorURL.
```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load```
7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use empty_hll to supplement columns that are not in the data
7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use hll_empty to supplement columns that are not in the data
```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=empty_hll()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
8. load data for strict mode filtering and set the time zone to Africa/Abidjan
```Curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load```
9. load a table with an aggregate model of `BITMAP_UNION`, either a column in the table or a column in the data to generate a `BITMAP_UNION` column
9. load a table with BITMAP columns, which can be columns in the table or a column in the data used to generate BITMAP columns, you can also use bitmap_empty to supplement columns that are not in the data
```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1)" -T testData http://host:port/api/testDb/testTbl/_stream_load```
```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load```
## keyword

View File

@ -642,8 +642,9 @@ public class InsertStmt extends DdlStmt {
private void checkBitmapCompatibility(Column col, Expr expr) throws AnalysisException {
boolean isCompatible = false;
final String bitmapMismatchLog = "Column's agg type is bitmap_union,"
+ " SelectList must contains bitmap_union column, to_bitmap or bitmap_union function's result, column=" + col.getName();
final String bitmapMismatchLog = "Column's type is BITMAP,"
+ " SelectList must contains BITMAP column, to_bitmap or bitmap_union" +
" or bitmap_empty function's result, column=" + col.getName();
if (expr instanceof SlotRef) {
final SlotRef slot = (SlotRef) expr;
Column column = slot.getDesc().getColumn();
@ -660,8 +661,11 @@ public class InsertStmt extends DdlStmt {
}
} else if (expr instanceof FunctionCallExpr) {
final FunctionCallExpr functionExpr = (FunctionCallExpr) expr;
if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
isCompatible = true; // select id, to_bitmap(id2) from table;
// select id, to_bitmap(id2) from table
// select id, bitmap_empty from table
if (functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
|| functionExpr.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
isCompatible = true;
}
}

View File

@ -513,8 +513,10 @@ public class FunctionSet {
public static final String BITMAP_UNION = "bitmap_union";
public static final String BITMAP_UNION_INT = "bitmap_union_int";
public static final String BITMAP_COUNT = "bitmap_count";
public static final String BITMAP_EMPTY = "bitmap_empty";
public static final String TO_BITMAP = "to_bitmap";
private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL =
ImmutableMap.<Type, String>builder()
.put(Type.TINYINT,

View File

@ -20,10 +20,14 @@ package org.apache.doris.planner;
import org.apache.doris.analysis.Analyzer;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.ExprSubstitutionMap;
import org.apache.doris.analysis.FunctionCallExpr;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.SlotRef;
import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.AggregateType;
import org.apache.doris.catalog.FunctionSet;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import com.google.common.collect.Lists;
@ -72,4 +76,24 @@ public abstract class LoadScanNode extends ScanNode {
addConjuncts(whereExpr.getConjuncts());
}
protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
boolean isCompatible = true;
if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
if (!(expr instanceof FunctionCallExpr)) {
isCompatible = false;
} else {
FunctionCallExpr fn = (FunctionCallExpr) expr;
if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)
&& !fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.BITMAP_EMPTY)) {
isCompatible = false;
}
}
}
if (!isCompatible) {
throw new AnalysisException("bitmap column must use to_bitmap or empty_bitmap function, like "
+ slotDesc.getColumn().getName() + "=to_bitmap(xxx)"
+ slotDesc.getColumn().getName() + "=bitmap_empty()");
}
}
}

View File

@ -18,13 +18,9 @@
package org.apache.doris.planner;
import org.apache.doris.analysis.Expr;
import org.apache.doris.analysis.FunctionCallExpr;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.AggregateType;
import org.apache.doris.catalog.FunctionSet;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.UserException;
import org.apache.doris.thrift.TNetworkAddress;
import org.apache.doris.thrift.TScanRangeLocations;
@ -80,24 +76,6 @@ abstract public class ScanNode extends PlanNode {
}
}
protected void checkBitmapCompatibility(SlotDescriptor slotDesc, Expr expr) throws AnalysisException {
boolean isCompatible = true;
if (slotDesc.getColumn().getAggregationType() == AggregateType.BITMAP_UNION) {
if (!(expr instanceof FunctionCallExpr)) {
isCompatible = false;
} else {
FunctionCallExpr fn = (FunctionCallExpr) expr;
if (!fn.getFnName().getFunction().equalsIgnoreCase(FunctionSet.TO_BITMAP)) {
isCompatible = false;
}
}
}
if (!isCompatible) {
throw new AnalysisException("bitmap_union column must use to_bitmap function, like "
+ slotDesc.getColumn().getName() + "=to_bitmap(xxx)");
}
}
/**
* Returns all scan ranges plus their locations. Needs to be preceded by a call to
* finalize().

View File

@ -601,6 +601,8 @@ visible_functions = [
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
[['bitmap_count'], 'BIGINT', ['VARCHAR'],
'_ZN5doris15BitmapFunctions12bitmap_countEPN9doris_udf15FunctionContextERKNS1_9StringValE'],
[['bitmap_empty'], 'VARCHAR', [],
'_ZN5doris15BitmapFunctions12bitmap_emptyEPN9doris_udf15FunctionContextE'],
# aes and base64 function