[fix](multi-catalog)fix max compute array and map type read offset (#39822)

bp #39680
This commit is contained in:
slothever
2024-08-23 16:53:52 +08:00
committed by GitHub
parent e0b65d0ff4
commit 67a8099991
3 changed files with 234 additions and 5 deletions

View File

@ -32,8 +32,12 @@ import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TimeStampNanoVector;
import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.log4j.Logger;
import java.math.BigDecimal;
@ -49,15 +53,22 @@ import java.util.List;
public class MaxComputeColumnValue implements ColumnValue {
private static final Logger LOG = Logger.getLogger(MaxComputeColumnValue.class);
private int idx;
private FieldVector column;
private int offset = 0; // for complex type
private ValueVector column;
public MaxComputeColumnValue() {
idx = 0;
}
public void reset(FieldVector column) {
public MaxComputeColumnValue(ValueVector valueVector, int i) {
this.column = valueVector;
this.idx = i;
}
public void reset(ValueVector column) {
this.column = column;
this.idx = 0;
this.offset = 0;
}
@Override
@ -222,16 +233,42 @@ public class MaxComputeColumnValue implements ColumnValue {
@Override
public void unpackArray(List<ColumnValue> values) {
skippedIfNull();
ListVector listCol = (ListVector) column;
int elemSize = listCol.getObject(idx).size();
for (int i = 0; i < elemSize; i++) {
MaxComputeColumnValue val = new MaxComputeColumnValue(listCol.getDataVector(), offset);
values.add(val);
offset++;
}
idx++;
}
@Override
public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
skippedIfNull();
MapVector mapCol = (MapVector) column;
int elemSize = mapCol.getObject(idx).size();
FieldVector keyList = mapCol.getDataVector().getChildrenFromFields().get(0);
FieldVector valList = mapCol.getDataVector().getChildrenFromFields().get(1);
for (int i = 0; i < elemSize; i++) {
MaxComputeColumnValue key = new MaxComputeColumnValue(keyList, offset);
keys.add(key);
MaxComputeColumnValue val = new MaxComputeColumnValue(valList, offset);
values.add(val);
offset++;
}
idx++;
}
@Override
public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> values) {
skippedIfNull();
StructVector structCol = (StructVector) column;
for (Integer fieldIndex : structFieldIndex) {
MaxComputeColumnValue val = new MaxComputeColumnValue(structCol.getChildByOrdinal(fieldIndex), idx);
values.add(val);
}
idx++;
}
}

View File

@ -0,0 +1,17 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !mc_q1 --
3 [1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"]
2 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"]
1 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"]
-- !mc_q2 --
{1:"example1", 2:"example2"} {1:2.5, 2:3.75}
{349:"asd", 324:"uid"} {3:2.5, 99:3.75}
-- !mc_q3 --
{"phone_number":123450, "email":"user1@example.com", "addr":"Addr1"} {"id":"user1", "age":25}
{"phone_number":2345671, "email":"user2@example.com", "addr":"Addr2"} {"id":"user2", "age":30}
-- !mc_q4 --
user1 [{"activity_date":"2024-08-01", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, {"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}]
user2 [{"activity_date":"2024-08-01", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, {"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}]

View File

@ -0,0 +1,175 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
/*
// Test Case DDL
create table array_table (
id int,
arr1 ARRAY<BIGINT>,
arr2 ARRAY<VARCHAR(10)>,
arr3 ARRAY<DOUBLE>,
arr4 ARRAY<DATE>,
arr5 ARRAY<DATETIME>
);
INSERT INTO array_table VALUES(1, array(1, 2, 3), array('a', 'b', 'c'), array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
INSERT INTO array_table VALUES(2, array(1, 2, 3), array('a', 'b', 'c'), array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
INSERT INTO array_table VALUES(3, array(1, 2, 3), array('a', 'b', 'c'), array(1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
create table map_table (
arr1 MAP<BIGINT, DOUBLE>,
arr2 MAP<BIGINT, STRING>
);
INSERT INTO map_table (arr1, arr2)
VALUES (
MAP(1, 2.5, 2, 3.75),
MAP(1, 'example1', 2, 'example2')
);
INSERT INTO map_table (arr1, arr2)
VALUES (
MAP(3, 2.5, 99, 3.75),
MAP(349, 'asd', 324, 'uid')
);
create table struct_table (
user_info STRUCT<id: STRING,age: INT>,
contact_info STRUCT<phone_number: BIGINT, email: STRING, addr: VARCHAR(10)>
);
INSERT INTO struct_table VALUES
(
named_struct('id', 'user1', 'age', 25),
named_struct('phone_number', 123450, 'email', 'user1@example.com', 'addr', 'Addr1')
),
(
named_struct('id', 'user2', 'age', 30),
named_struct('phone_number', 2345671, 'email', 'user2@example.com', 'addr', 'Addr2')
),
(
named_struct('id', 'user3', 'age', 35),
named_struct('phone_number', 3456789, 'email', 'user3@example.com', 'addr', 'Addr3')
);
CREATE TABLE nested_complex_table (
user_id STRING,
user_profile STRUCT<
name: STRING,
age: INT,
preferences: MAP<
STRING,
STRUCT<
preference_id: INT,
preference_values: ARRAY<STRING>
>
>
>,
activity_log ARRAY<
STRUCT<
activity_date: STRING,
activities: MAP<
STRING,
STRUCT<
details: STRING,
metrics: MAP<STRING, DOUBLE>
>
>
>
>
);
INSERT INTO nested_complex_table VALUES
(
'user1',
named_struct('name', 'Alice', 'age', 28, 'preferences', map(
'sports', named_struct('preference_id', 101, 'preference_values', array('soccer', 'tennis')),
'music', named_struct('preference_id', 102, 'preference_values', array('rock', 'classical'))
)),
array(
named_struct('activity_date', '2024-08-01', 'activities', map(
'workout', named_struct('details', 'Morning run', 'metrics', map('duration', 30.5, 'calories', 200.0)),
'reading', named_struct('details', 'Read book on Hive', 'metrics', map('pages', 50.0, 'time', 2.0))
)),
named_struct('activity_date', '2024-08-02', 'activities', map(
'travel', named_struct('details', 'Flight to NY', 'metrics', map('distance', 500.0, 'time', 3.0)),
'meeting', named_struct('details', 'Project meeting', 'metrics', map('duration', 1.5, 'participants', 5.0))
))
)
),
(
'user2',
named_struct('name', 'Bob', 'age', 32, 'preferences', map(
'books', named_struct('preference_id', 201, 'preference_values', array('fiction', 'non-fiction')),
'travel', named_struct('preference_id', 202, 'preference_values', array('beaches', 'mountains'))
)),
array(
named_struct('activity_date', '2024-08-01', 'activities', map(
'hiking', named_struct('details', 'Mountain trail', 'metrics', map('distance', 10.0, 'elevation', 500.0)),
'photography', named_struct('details', 'Wildlife photoshoot', 'metrics', map('photos_taken', 100.0, 'time', 4.0))
)),
named_struct('activity_date', '2024-08-02', 'activities', map(
'workshop', named_struct('details', 'Photography workshop', 'metrics', map('duration', 3.0, 'participants', 15.0)),
'shopping', named_struct('details', 'Bought camera gear', 'metrics', map('items', 5.0, 'cost', 1500.0))
))
)
),
(
'user3',
named_struct('name', 'Carol', 'age', 24, 'preferences', map(
'food', named_struct('preference_id', 301, 'preference_values', array('vegan', 'desserts')),
'movies', named_struct('preference_id', 302, 'preference_values', array('action', 'comedy'))
)),
array(
named_struct('activity_date', '2024-08-01', 'activities', map(
'cooking', named_struct('details', 'Made vegan meal', 'metrics', map('time_spent', 1.5, 'calories', 500.0)),
'movie', named_struct('details', 'Watched action movie', 'metrics', map('duration', 2.0, 'rating', 8.5))
)),
named_struct('activity_date', '2024-08-02', 'activities', map(
'gym', named_struct('details', 'Strength training', 'metrics', map('duration', 1.0, 'calories', 300.0)),
'shopping', named_struct('details', 'Bought groceries', 'metrics', map('items', 10.0, 'cost', 100.0))
))
)
);
*/
suite("test_max_compute_complex_type", "p0,external,doris,external_docker,external_docker_doris") {
String enabled = context.config.otherConfigs.get("enableMaxComputeTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String ak = context.config.otherConfigs.get("aliYunAk")
String sk = context.config.otherConfigs.get("aliYunSk")
String mc_catalog_name = "test_max_compute_complex_type"
sql """drop catalog if exists ${mc_catalog_name} """
sql """
CREATE CATALOG IF NOT EXISTS ${mc_catalog_name} PROPERTIES (
"type" = "max_compute",
"mc.default.project" = "mc_datalake",
"mc.region" = "cn-beijing",
"mc.access_key" = "${ak}",
"mc.secret_key" = "${sk}",
"mc.public_access" = "true"
);
"""
logger.info("catalog " + mc_catalog_name + " created")
sql """switch ${mc_catalog_name};"""
logger.info("switched to catalog " + mc_catalog_name)
sql """ use mc_datalake """
qt_mc_q1 """ select id,arr3,arr1,arr5,arr2 from array_table order by id desc """
qt_mc_q2 """ select arr2,arr1 from map_table order by id limit 2 """
qt_mc_q3 """ select contact_info,user_info from struct_table order by id limit 2 """
qt_mc_q4 """ select user_id,activity_log from nested_complex_table order by user_id limit 2 """
sql """drop catalog ${mc_catalog_name};"""
}
}