diff --git a/docs/en/docs/advanced/variables.md b/docs/en/docs/advanced/variables.md index 9af59255da..86e193abaa 100644 --- a/docs/en/docs/advanced/variables.md +++ b/docs/en/docs/advanced/variables.md @@ -486,7 +486,33 @@ Translated with www.DeepL.com/Translator (free version) Used to control whether to perform predicate derivation. There are two values: true and false. It is turned off by default, that is, the system does not perform predicate derivation, and uses the original predicate to perform related operations. After it is set to true, predicate expansion is performed. * `return_object_data_as_binary` - Used to identify whether to return the bitmap/hll result in the select result. In the select into outfile statement, if the export file format is csv, the bimap/hll data will be base64-encoded, if it is the parquet file format, the data will be stored as a byte array + Used to identify whether to return the bitmap/hll result in the select result. In the select into outfile statement, if the export file format is csv, the bimap/hll data will be base64-encoded, if it is the parquet file format, the data will be stored as a byte array. Below will be an example of Java, more examples can be found in [samples](https://github.com/apache/doris/tree/master/samples/read_bitmap). + + ```java + try (Connection conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:9030/test?user=root"); + Statement stmt = conn.createStatement() + ) { + stmt.execute("set return_object_data_as_binary=true"); // IMPORTANT!!! + ResultSet rs = stmt.executeQuery("select uids from t_bitmap"); + while(rs.next()){ + byte[] bytes = rs.getBytes(1); + RoaringBitmap bitmap32 = new RoaringBitmap(); + switch(bytes[0]) { + case 0: // for empty bitmap + break; + case 1: // for only 1 element in bitmap32 + bitmap32.add(ByteBuffer.wrap(bytes,1,bytes.length-1) + .order(ByteOrder.LITTLE_ENDIAN) + .getInt()); + break; + case 2: // for more than 1 elements in bitmap32 + bitmap32.deserialize(ByteBuffer.wrap(bytes,1,bytes.length-1)); + break; + // for more details, see https://github.com/apache/doris/tree/master/samples/read_bitmap + } + } + } + ``` * `block_encryption_mode` The block_encryption_mode variable controls the block encryption mode. The default setting is empty, when use AES equal to `AES_128_ECB`, when use SM4 equal to `SM3_128_ECB` @@ -641,4 +667,4 @@ Translated with www.DeepL.com/Translator (free version) * Precautions - The timeout set by `user property` needs to be triggered after the client reconnects. \ No newline at end of file + The timeout set by `user property` needs to be triggered after the client reconnects. diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md index c66b29424a..a96026ac60 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md @@ -37,12 +37,36 @@ Note: If BITMAP does not use a global dictionary in real-time scenarios, using b ### example +Create table example: + + create table metric_table ( + datekey int, + hour int, + device_id bitmap BITMAP_UNION + ) + aggregate key (datekey, hour) + distributed by hash(datekey, hour) buckets 1 + properties( + "replication_num" = "1" + ); + +Insert data example: + + insert into metric_table values + (20200622, 1, to_bitmap(243)), + (20200622, 2, bitmap_from_array([1,2,3,4,5,434543])), + (20200622, 3, to_bitmap(287667876573)); + +Query data example: + select hour, BITMAP_UNION_COUNT(pv) over(order by hour) uv from( select hour, BITMAP_UNION(device_id) as pv from metric_table -- Query the accumulated UV per hour - where datekey=20200922 + where datekey=20200622 group by hour order by 1 ) final; + +When querying, BITMAP can cooperate with `return_object_data_as_binary`. For details, please refer to [variables](../../../advanced/variables.md). ### keywords BITMAP diff --git a/docs/zh-CN/docs/advanced/variables.md b/docs/zh-CN/docs/advanced/variables.md index 2913435a73..3ff5e55a17 100644 --- a/docs/zh-CN/docs/advanced/variables.md +++ b/docs/zh-CN/docs/advanced/variables.md @@ -475,7 +475,33 @@ SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3); 用于关闭所有系统自动的 join reorder 算法。取值有两种:true 和 false。默认行况下关闭,也就是采用系统自动的 join reorder 算法。设置为 true 后,系统会关闭所有自动排序的算法,采用 SQL 原始的表顺序,执行 join -- `return_object_data_as_binary` 用于标识是否在select 结果中返回bitmap/hll 结果。在 select into outfile 语句中,如果导出文件格式为csv 则会将 bimap/hll 数据进行base64编码,如果是parquet 文件格式 将会把数据作为byte array 存储 +- `return_object_data_as_binary` 用于标识是否在select 结果中返回bitmap/hll 结果。在 select into outfile 语句中,如果导出文件格式为csv 则会将 bimap/hll 数据进行base64编码,如果是parquet 文件格式 将会把数据作为byte array 存储。下面将展示 Java 的例子,更多的示例可查看[samples](https://github.com/apache/doris/tree/master/samples/read_bitmap). + +```java +try (Connection conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:9030/test?user=root"); + Statement stmt = conn.createStatement() +) { + stmt.execute("set return_object_data_as_binary=true"); // IMPORTANT!!! + ResultSet rs = stmt.executeQuery("select uids from t_bitmap"); + while(rs.next()){ + byte[] bytes = rs.getBytes(1); + RoaringBitmap bitmap32 = new RoaringBitmap(); + switch(bytes[0]) { + case 0: // for empty bitmap + break; + case 1: // for only 1 element in bitmap32 + bitmap32.add(ByteBuffer.wrap(bytes,1,bytes.length-1) + .order(ByteOrder.LITTLE_ENDIAN) + .getInt()); + break; + case 2: // for more than 1 elements in bitmap32 + bitmap32.deserialize(ByteBuffer.wrap(bytes,1,bytes.length-1)); + break; + // for more details, see https://github.com/apache/doris/tree/master/samples/read_bitmap + } + } +} +``` - `block_encryption_mode` 可以通过block_encryption_mode参数,控制块加密模式,默认值为:空。当使用AES算法加密时相当于`AES_128_ECB`, 当时用SM3算法加密时相当于`SM3_128_ECB` 可选值: diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md index f469615ab1..02604de472 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md @@ -36,6 +36,28 @@ under the License. ### example +建表示例如下: + + create table metric_table ( + datekey int, + hour int, + device_id bitmap BITMAP_UNION + ) + aggregate key (datekey, hour) + distributed by hash(datekey, hour) buckets 1 + properties( + "replication_num" = "1" + ); + +插入数据示例: + + insert into metric_table values + (20200622, 1, to_bitmap(243)), + (20200622, 2, bitmap_from_array([1,2,3,4,5,434543])), + (20200622, 3, to_bitmap(287667876573)); + +查询数据示例: + select hour, BITMAP_UNION_COUNT(pv) over(order by hour) uv from( select hour, BITMAP_UNION(device_id) as pv from metric_table -- 查询每小时的累计UV @@ -43,6 +65,8 @@ under the License. group by hour order by 1 ) final; +在查询时,BITMAP 可配合`return_object_data_as_binary`变量进行使用,详情可查看[变量](../../../advanced/variables.md)章节。 + ### keywords BITMAP diff --git a/samples/read_bitmap/java/ReadBitmap.java b/samples/read_bitmap/java/ReadBitmap.java new file mode 100644 index 0000000000..b94a3ca9c8 --- /dev/null +++ b/samples/read_bitmap/java/ReadBitmap.java @@ -0,0 +1,145 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.longlong.Roaring64NavigableMap; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.Statement; + +/** + * Prepare the table and data in Doris: + * + *
+ * {@code
+ * CREATE TABLE `t_bitmap` (
+ *   `rn` int(11) NULL,
+ *   `uids` bitmap BITMAP_UNION NULL
+ * ) AGGREGATE KEY(`rn`)
+ * DISTRIBUTED BY HASH(`rn`) BUCKETS 1
+ * PROPERTIES (
+ *   "replication_num" = "1"
+ * );
+ *
+ * INSERT INTO t_bitmap VALUES
+ * (0, bitmap_empty()),
+ * (1, to_bitmap(243)),
+ * (2, bitmap_from_array([1,2,3,4,5,434543])),
+ * (3, to_bitmap(287667876573)),
+ * (4, bitmap_from_array([487667876573, 387627876573, 987567876573, 187667876573]));
+ * }
+ * 
+ * + * The pom.xml dependency: + *
+ * {@code
+ * 
+ *    mysql
+ *    mysql-connector-java
+ *    8.0.28
+ * 
+ * 
+ *    org.roaringbitmap
+ *    RoaringBitmap
+ *    0.9.39
+ * 
+ * }
+ * 
+ */ +public class ReadBitmap { + public static void main(String[] args) throws Exception { + + try (Connection conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:9030/test?user=root"); + Statement stmt = conn.createStatement() + ) { + stmt.execute("set return_object_data_as_binary=true"); // IMPORTANT!!! + ResultSet rs = stmt.executeQuery("select uids from t_bitmap"); + while (rs.next()) { + byte[] bytes = rs.getBytes(1); + + RoaringBitmap bitmap32 = new RoaringBitmap(); + // Only Roaring64NavigableMap can work, Roaring64Bitmap can't work!!! + Roaring64NavigableMap bitmap64 = new Roaring64NavigableMap(); + switch (bytes[0]) { + case 0: // for empty bitmap + break; + case 1: // for only 1 element in bitmap32 + bitmap32.add(ByteBuffer.wrap(bytes, 1, bytes.length - 1) + .order(ByteOrder.LITTLE_ENDIAN) + .getInt()); + break; + case 2: // for more than 1 elements in bitmap32 + bitmap32.deserialize(ByteBuffer.wrap(bytes, 1, bytes.length - 1)); + break; + case 3: // for only 1 element in bitmap64 + bitmap64.add(ByteBuffer.wrap(bytes, 1, bytes.length - 1) + .order(ByteOrder.LITTLE_ENDIAN) + .getLong()); + break; + case 4: // for more than 1 elements in bitmap64 + Object[] tuple2 = decodeVarint64(bytes); + int offset = (int) tuple2[1]; + int newLen = 8 + bytes.length - offset; + + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(newLen); + DataOutputStream dos = new DataOutputStream(baos)) { + dos.write((byte[]) tuple2[0]); + dos.write(bytes, offset, bytes.length - offset); + dos.flush(); + try (DataInputStream dis = new DataInputStream( + new ByteArrayInputStream(baos.toByteArray()))) { + bitmap64.deserializePortable(dis); + } + } + break; + } + System.out.println(bytes[0] <= 2 ? bitmap32 : bitmap64); + } + } + } + + static Object[] decodeVarint64(byte[] bt) { // nolint + long result = 0; + int shift = 0; + short B = 128; + int idx = 1; + for (; ; ) { + short readByte = bt[idx]; + idx++; + boolean isEnd = (readByte & B) == 0; + result |= (long) (readByte & (B - 1)) << (shift * 7); + if (isEnd) { + break; + } + shift++; + } + byte[] bytes = new byte[8]; + for (int i = 0; i < bytes.length; i++) { + // LITTLE_ENDIAN + bytes[i] = (byte) (result >> 8 * i); + } + return new Object[]{bytes, idx}; + } +}