[fix](multi-catalog)fix max compute array and map type read offset (#39822)
bp #39680
This commit is contained in:
@ -32,8 +32,12 @@ import org.apache.arrow.vector.IntVector;
|
||||
import org.apache.arrow.vector.SmallIntVector;
|
||||
import org.apache.arrow.vector.TimeStampNanoVector;
|
||||
import org.apache.arrow.vector.TinyIntVector;
|
||||
import org.apache.arrow.vector.ValueVector;
|
||||
import org.apache.arrow.vector.VarBinaryVector;
|
||||
import org.apache.arrow.vector.VarCharVector;
|
||||
import org.apache.arrow.vector.complex.ListVector;
|
||||
import org.apache.arrow.vector.complex.MapVector;
|
||||
import org.apache.arrow.vector.complex.StructVector;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
@ -49,15 +53,22 @@ import java.util.List;
|
||||
public class MaxComputeColumnValue implements ColumnValue {
|
||||
private static final Logger LOG = Logger.getLogger(MaxComputeColumnValue.class);
|
||||
private int idx;
|
||||
private FieldVector column;
|
||||
private int offset = 0; // for complex type
|
||||
private ValueVector column;
|
||||
|
||||
public MaxComputeColumnValue() {
|
||||
idx = 0;
|
||||
}
|
||||
|
||||
public void reset(FieldVector column) {
|
||||
public MaxComputeColumnValue(ValueVector valueVector, int i) {
|
||||
this.column = valueVector;
|
||||
this.idx = i;
|
||||
}
|
||||
|
||||
public void reset(ValueVector column) {
|
||||
this.column = column;
|
||||
this.idx = 0;
|
||||
this.offset = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -222,16 +233,42 @@ public class MaxComputeColumnValue implements ColumnValue {
|
||||
|
||||
@Override
|
||||
public void unpackArray(List<ColumnValue> values) {
|
||||
|
||||
skippedIfNull();
|
||||
ListVector listCol = (ListVector) column;
|
||||
int elemSize = listCol.getObject(idx).size();
|
||||
for (int i = 0; i < elemSize; i++) {
|
||||
MaxComputeColumnValue val = new MaxComputeColumnValue(listCol.getDataVector(), offset);
|
||||
values.add(val);
|
||||
offset++;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
|
||||
|
||||
skippedIfNull();
|
||||
MapVector mapCol = (MapVector) column;
|
||||
int elemSize = mapCol.getObject(idx).size();
|
||||
FieldVector keyList = mapCol.getDataVector().getChildrenFromFields().get(0);
|
||||
FieldVector valList = mapCol.getDataVector().getChildrenFromFields().get(1);
|
||||
for (int i = 0; i < elemSize; i++) {
|
||||
MaxComputeColumnValue key = new MaxComputeColumnValue(keyList, offset);
|
||||
keys.add(key);
|
||||
MaxComputeColumnValue val = new MaxComputeColumnValue(valList, offset);
|
||||
values.add(val);
|
||||
offset++;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> values) {
|
||||
|
||||
skippedIfNull();
|
||||
StructVector structCol = (StructVector) column;
|
||||
for (Integer fieldIndex : structFieldIndex) {
|
||||
MaxComputeColumnValue val = new MaxComputeColumnValue(structCol.getChildByOrdinal(fieldIndex), idx);
|
||||
values.add(val);
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !mc_q1 --
|
||||
3 [1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"]
|
||||
2 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"]
|
||||
1 [1.2, 1.3] [1, 2, 3] ["2023-05-23 05:55:12.000"] ["a", "b", "c"]
|
||||
|
||||
-- !mc_q2 --
|
||||
{1:"example1", 2:"example2"} {1:2.5, 2:3.75}
|
||||
{349:"asd", 324:"uid"} {3:2.5, 99:3.75}
|
||||
|
||||
-- !mc_q3 --
|
||||
{"phone_number":123450, "email":"user1@example.com", "addr":"Addr1"} {"id":"user1", "age":25}
|
||||
{"phone_number":2345671, "email":"user2@example.com", "addr":"Addr2"} {"id":"user2", "age":30}
|
||||
|
||||
-- !mc_q4 --
|
||||
user1 [{"activity_date":"2024-08-01", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, {"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}]
|
||||
user2 [{"activity_date":"2024-08-01", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}, {"activity_date":"2024-08-02", "activities":{"cooking":{"details":"Made vegan meal", "metrics":{"time_spent":1.5, "calories":500}}, "movie":{"details":"Watched action movie", "metrics":{"time_spent":1.5, "calories":500}}}}]
|
||||
@ -0,0 +1,175 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
/*
|
||||
// Test Case DDL
|
||||
create table array_table (
|
||||
id int,
|
||||
arr1 ARRAY<BIGINT>,
|
||||
arr2 ARRAY<VARCHAR(10)>,
|
||||
arr3 ARRAY<DOUBLE>,
|
||||
arr4 ARRAY<DATE>,
|
||||
arr5 ARRAY<DATETIME>
|
||||
);
|
||||
INSERT INTO array_table VALUES(1, array(1, 2, 3), array('a', 'b', 'c'), array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
|
||||
INSERT INTO array_table VALUES(2, array(1, 2, 3), array('a', 'b', 'c'), array(1.2, 1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
|
||||
INSERT INTO array_table VALUES(3, array(1, 2, 3), array('a', 'b', 'c'), array(1.3), array(date('2023-05-23')), array(datetime('2023-05-23 13:55:12')));
|
||||
|
||||
create table map_table (
|
||||
arr1 MAP<BIGINT, DOUBLE>,
|
||||
arr2 MAP<BIGINT, STRING>
|
||||
);
|
||||
INSERT INTO map_table (arr1, arr2)
|
||||
VALUES (
|
||||
MAP(1, 2.5, 2, 3.75),
|
||||
MAP(1, 'example1', 2, 'example2')
|
||||
);
|
||||
INSERT INTO map_table (arr1, arr2)
|
||||
VALUES (
|
||||
MAP(3, 2.5, 99, 3.75),
|
||||
MAP(349, 'asd', 324, 'uid')
|
||||
);
|
||||
|
||||
create table struct_table (
|
||||
user_info STRUCT<id: STRING,age: INT>,
|
||||
contact_info STRUCT<phone_number: BIGINT, email: STRING, addr: VARCHAR(10)>
|
||||
);
|
||||
|
||||
INSERT INTO struct_table VALUES
|
||||
(
|
||||
named_struct('id', 'user1', 'age', 25),
|
||||
named_struct('phone_number', 123450, 'email', 'user1@example.com', 'addr', 'Addr1')
|
||||
),
|
||||
(
|
||||
named_struct('id', 'user2', 'age', 30),
|
||||
named_struct('phone_number', 2345671, 'email', 'user2@example.com', 'addr', 'Addr2')
|
||||
),
|
||||
(
|
||||
named_struct('id', 'user3', 'age', 35),
|
||||
named_struct('phone_number', 3456789, 'email', 'user3@example.com', 'addr', 'Addr3')
|
||||
);
|
||||
|
||||
CREATE TABLE nested_complex_table (
|
||||
user_id STRING,
|
||||
user_profile STRUCT<
|
||||
name: STRING,
|
||||
age: INT,
|
||||
preferences: MAP<
|
||||
STRING,
|
||||
STRUCT<
|
||||
preference_id: INT,
|
||||
preference_values: ARRAY<STRING>
|
||||
>
|
||||
>
|
||||
>,
|
||||
activity_log ARRAY<
|
||||
STRUCT<
|
||||
activity_date: STRING,
|
||||
activities: MAP<
|
||||
STRING,
|
||||
STRUCT<
|
||||
details: STRING,
|
||||
metrics: MAP<STRING, DOUBLE>
|
||||
>
|
||||
>
|
||||
>
|
||||
>
|
||||
);
|
||||
INSERT INTO nested_complex_table VALUES
|
||||
(
|
||||
'user1',
|
||||
named_struct('name', 'Alice', 'age', 28, 'preferences', map(
|
||||
'sports', named_struct('preference_id', 101, 'preference_values', array('soccer', 'tennis')),
|
||||
'music', named_struct('preference_id', 102, 'preference_values', array('rock', 'classical'))
|
||||
)),
|
||||
array(
|
||||
named_struct('activity_date', '2024-08-01', 'activities', map(
|
||||
'workout', named_struct('details', 'Morning run', 'metrics', map('duration', 30.5, 'calories', 200.0)),
|
||||
'reading', named_struct('details', 'Read book on Hive', 'metrics', map('pages', 50.0, 'time', 2.0))
|
||||
)),
|
||||
named_struct('activity_date', '2024-08-02', 'activities', map(
|
||||
'travel', named_struct('details', 'Flight to NY', 'metrics', map('distance', 500.0, 'time', 3.0)),
|
||||
'meeting', named_struct('details', 'Project meeting', 'metrics', map('duration', 1.5, 'participants', 5.0))
|
||||
))
|
||||
)
|
||||
),
|
||||
(
|
||||
'user2',
|
||||
named_struct('name', 'Bob', 'age', 32, 'preferences', map(
|
||||
'books', named_struct('preference_id', 201, 'preference_values', array('fiction', 'non-fiction')),
|
||||
'travel', named_struct('preference_id', 202, 'preference_values', array('beaches', 'mountains'))
|
||||
)),
|
||||
array(
|
||||
named_struct('activity_date', '2024-08-01', 'activities', map(
|
||||
'hiking', named_struct('details', 'Mountain trail', 'metrics', map('distance', 10.0, 'elevation', 500.0)),
|
||||
'photography', named_struct('details', 'Wildlife photoshoot', 'metrics', map('photos_taken', 100.0, 'time', 4.0))
|
||||
)),
|
||||
named_struct('activity_date', '2024-08-02', 'activities', map(
|
||||
'workshop', named_struct('details', 'Photography workshop', 'metrics', map('duration', 3.0, 'participants', 15.0)),
|
||||
'shopping', named_struct('details', 'Bought camera gear', 'metrics', map('items', 5.0, 'cost', 1500.0))
|
||||
))
|
||||
)
|
||||
),
|
||||
(
|
||||
'user3',
|
||||
named_struct('name', 'Carol', 'age', 24, 'preferences', map(
|
||||
'food', named_struct('preference_id', 301, 'preference_values', array('vegan', 'desserts')),
|
||||
'movies', named_struct('preference_id', 302, 'preference_values', array('action', 'comedy'))
|
||||
)),
|
||||
array(
|
||||
named_struct('activity_date', '2024-08-01', 'activities', map(
|
||||
'cooking', named_struct('details', 'Made vegan meal', 'metrics', map('time_spent', 1.5, 'calories', 500.0)),
|
||||
'movie', named_struct('details', 'Watched action movie', 'metrics', map('duration', 2.0, 'rating', 8.5))
|
||||
)),
|
||||
named_struct('activity_date', '2024-08-02', 'activities', map(
|
||||
'gym', named_struct('details', 'Strength training', 'metrics', map('duration', 1.0, 'calories', 300.0)),
|
||||
'shopping', named_struct('details', 'Bought groceries', 'metrics', map('items', 10.0, 'cost', 100.0))
|
||||
))
|
||||
)
|
||||
);
|
||||
*/
|
||||
suite("test_max_compute_complex_type", "p0,external,doris,external_docker,external_docker_doris") {
|
||||
String enabled = context.config.otherConfigs.get("enableMaxComputeTest")
|
||||
if (enabled != null && enabled.equalsIgnoreCase("true")) {
|
||||
String ak = context.config.otherConfigs.get("aliYunAk")
|
||||
String sk = context.config.otherConfigs.get("aliYunSk")
|
||||
String mc_catalog_name = "test_max_compute_complex_type"
|
||||
sql """drop catalog if exists ${mc_catalog_name} """
|
||||
sql """
|
||||
CREATE CATALOG IF NOT EXISTS ${mc_catalog_name} PROPERTIES (
|
||||
"type" = "max_compute",
|
||||
"mc.default.project" = "mc_datalake",
|
||||
"mc.region" = "cn-beijing",
|
||||
"mc.access_key" = "${ak}",
|
||||
"mc.secret_key" = "${sk}",
|
||||
"mc.public_access" = "true"
|
||||
);
|
||||
"""
|
||||
|
||||
logger.info("catalog " + mc_catalog_name + " created")
|
||||
sql """switch ${mc_catalog_name};"""
|
||||
logger.info("switched to catalog " + mc_catalog_name)
|
||||
sql """ use mc_datalake """
|
||||
|
||||
qt_mc_q1 """ select id,arr3,arr1,arr5,arr2 from array_table order by id desc """
|
||||
qt_mc_q2 """ select arr2,arr1 from map_table order by id limit 2 """
|
||||
qt_mc_q3 """ select contact_info,user_info from struct_table order by id limit 2 """
|
||||
qt_mc_q4 """ select user_id,activity_log from nested_complex_table order by user_id limit 2 """
|
||||
|
||||
sql """drop catalog ${mc_catalog_name};"""
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user