#!/bin/env python # -*- coding: utf-8 -*- # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. ############################################################################ # # @file test_array_ddl.py # @date 2022-08-15 11:09:53 # @brief This file is a test file for array type. # ############################################################################# """ test_array_ddl.py """ import sys import os import time import pytest file_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) sys.path.append(file_dir) file_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.append(file_dir) from lib import palo_config from lib import palo_client from lib import util from lib import common from lib import palo_job from data import schema as SCHEMA from data import load_file as FILE config = palo_config.config broker_info = palo_config.broker_info def setup_module(): """setup""" client = common.get_client() ret = client.show_variables('enable_vectorized_engine') if len(ret) == 1 and ret[0][1] == 'false': raise pytest.skip('skip if enable_vectorized_engine is false') ret = client.admin_show_config('enable_array_type') assert len(ret) == 1, 'get enable_array_type config error' value = palo_job.AdminShowConfig(ret[0]).get_value() if value != 'true': client.set_frontend_config('enable_array_type', 'true') def teardown_module(): """teardown""" pass def test_array_basic(): """ { "title": "test_array_basic", "describe": "array类型,基础使用,建表成功,导入成功,查询成功", "tag": "function,p0" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) ret = client.create_table(table_name, SCHEMA.array_table_list, keys_desc=SCHEMA.duplicate_key) assert ret, 'create table failed' ret = client.stream_load(table_name, FILE.test_array_table_local_file, column_separator='|') assert ret, 'stream load failed' assert client.verify(FILE.expe_array_table_file, table_name), 'check data failed' client.clean(database_name) def test_array_key(): """ { "title": "", "describe": "array类型,不支持作为key列", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) column = [("k2", "ARRAY"), ("k1", "INT")] msg = 'The olap table first column could not be' # with duplicate key util.assert_return(False, msg, client.create_table, table_name, column) column = [("k1", "INT"), ("k2", "ARRAY")] msg = 'Array can only be used in the non-key column of the duplicate table at present.' # with duplicate key util.assert_return(False, msg, client.create_table, table_name, column, keys_desc='DUPLICATE KEY(k1,k2)') # default dupllicate key util.assert_return(True, '', client.create_table, table_name, column) ret = client.desc_table(table_name) key = util.get_attr_condition_value(ret, palo_job.DescInfo.Field, 'k2', palo_job.DescInfo.Key) assert key == 'false', 'array column can not be key' client.clean(database_name) def test_array_table_model(): """ { "title": "", "describe": "array类型,仅支持duplicate模型,不支持unique表和agg表,不支持agg中任一聚合方式", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) column = [("k1", "INT"), ("k2", "ARRAY")] # unique table msg = "ARRAY column can't support aggregation REPLACE" util.assert_return(False, msg, client.create_table, table_name, column, keys_desc='UNIQUE KEY(k1)') # duplicate table util.assert_return(True, '', client.create_table, table_name, column, keys_desc='DUPLICATE KEY(k1)') # aggregate table agg_types = ['MAX', 'MIN', 'SUM', 'BITMAP_UNION', 'HLL_UNION'] msg = 'is not compatible with primitive type array' for agg in agg_types: column = [("k1", "INT"), ("k2", "ARRAY %s" % agg)] util.assert_return(False, msg, client.create_table, table_name, column) agg_types = ['REPLACE', 'REPLACE_IF_NOT_NULL'] msg = "ARRAY column can't support aggregation" for agg in agg_types: column = [("k1", "INT"), ("k2", "ARRAY %s" % agg)] util.assert_return(False, msg, client.create_table, table_name, column) client.clean(database_name) def test_array_tb_partition(): """ { "title": "", "describe": "array类型,分区表,导入、查询成功", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) ret = client.create_table(table_name, SCHEMA.array_table_list, partition_info=SCHEMA.baseall_tinyint_partition_info, keys_desc=SCHEMA.duplicate_key) assert ret, 'create table failed' ret = client.stream_load(table_name, FILE.test_array_table_local_file, column_separator='|') assert ret, 'stream load failed' assert client.verify(FILE.expe_array_table_file, table_name), 'check data failed' client.clean(database_name) def test_array_default(): """ { "title": "", "describe": "array类型,设置default支持,null的default值,not null的default值", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) # array default not null column = [("k1", "INT", "", "88"), ("k2", "ARRAY", "", "[1,2,3]")] msg = 'Array type column default value only support null' util.assert_return(False, msg, client.create_table, table_name, column, keys_desc="DUPLICATE KEY(k1)", set_null=True) # not null array default null column = [("k1", "INT", "", "99"), ("k2", "ARRAY", "", None)] msg = 'Can not set null default value to non nullable column: k2' util.assert_return(False, msg, client.create_table, table_name, column, keys_desc="DUPLICATE KEY(k1)", set_null=False) # null array defaylt null ret = client.create_table(table_name, column, keys_desc="DUPLICATE KEY(k1)", set_null=True) assert ret, 'create table failed' sql = 'insert into %s values(1, [1,2,3])' % table_name client.execute(sql) sql = 'insert into %s(k1) values(2), (3), (4)' % table_name client.execute(sql) ret1 = client.select_all(table_name) ret2 = ((1, '[1, 2, 3]'), (2, None), (3, None), (4, None)) util.check(ret1, ret2, True) client.clean(database_name) def test_array_create_like(): """ { "title": "", "describe": "含有array类型的表,使用create table like创建一个空表,导入查询成功", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) ret = client.create_table(table_name, SCHEMA.array_table_list, partition_info=SCHEMA.baseall_tinyint_partition_info, keys_desc=SCHEMA.duplicate_key) assert ret, 'create table failed' ret = client.stream_load(table_name, FILE.test_array_table_local_file, column_separator='|') assert ret, 'stream load failed' new_tb = 'new_%s' % table_name ret = client.create_table_like(new_tb, table_name) assert ret, 'create table failed' ret = client.select_all(new_tb) assert ret == (), 'expect empty table' ret1 = client.desc_table(table_name) ret2 = client.desc_table(new_tb) util.check(ret1, ret2) ret = client.stream_load(new_tb, FILE.test_array_table_local_file, column_separator='|') assert ret, 'stream load failed' time.sleep(10) assert client.verify(FILE.expe_array_table_file, new_tb), 'check data failed' client.clean(database_name) def test_array_ctas(): """ { "title": "", "describe": "含有array类型的查询结果,创建表,建表成功,数据导入成功", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) sql1 = 'select 1, 2, 3, array_sort(collect_list(k6)), array_sort(collect_list(k7)), ' \ 'array_sort(collect_list(k5)) from test_query_qa.test' sql = 'create table %s as select 1 k1, 2 k2, 3 k3, collect_list(k6) a1, collect_list(k7) a2, ' \ 'collect_list(k5) a3 from test_query_qa.test' % table_name ret = client.execute(sql) sql2 = 'select k1, k2, k3, array_sort(a1), array_sort(a2), array_sort(a3) from %s.%s' % (database_name, table_name) common.check2(client, sql1, sql2=sql2, forced=True) ret = client.desc_table(table_name) assert "ARRAY" == util.get_attr_condition_value(ret, palo_job.DescInfo.Field, 'a1', palo_job.DescInfo.Type), 'column type error' assert "ARRAY" == util.get_attr_condition_value(ret, palo_job.DescInfo.Field, 'a2', palo_job.DescInfo.Type), 'column type error' assert "ARRAY" == util.get_attr_condition_value(ret, palo_job.DescInfo.Field, 'a3', palo_job.DescInfo.Type), 'column type error' client.clean(database_name) def test_array_bucket_column(): """ { "title": "", "describe": "使用array作为bucket列,建表失败", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) distribute = palo_client.DistributionInfo('hash(a2)', 1) msg = 'Array Type should not be used in distribution column[a2]' util.assert_return(False, msg, client.create_table, table_name, SCHEMA.array_table_list, partition_info=SCHEMA.baseall_tinyint_partition_info, keys_desc=SCHEMA.duplicate_key, distribution_info=distribute) client.clean(database_name) def test_array_nest(): """ { "title": "", "describe": "array嵌套,仅验证建表,导入查询", "tag": "function,p2" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) column = [("k1", "INT"), ("k2", "ARRAY>")] ret = client.create_table(table_name, column, keys_desc='DUPLICATE KEY(k1)') assert ret, 'create table failed' sql = 'insert into %s values(1, [[1, 2, 3], [-1, -2, -3]])' % table_name client.execute(sql) sql = 'insert into %s values(2, [[3, 4, 5], [-1, 2, 3]])' % table_name client.execute(sql) ret1 = client.select_all(table_name) ret2 = ((1, '[[1, 2, 3], [-1, -2, -3]]'), (2, '[[3, 4, 5], [-1, 2, 3]]')) util.check(ret1, ret2, True) ret = client.desc_table(table_name) assert "ARRAY>" == \ util.get_attr_condition_value(ret, palo_job.DescInfo.Field, 'k2', palo_job.DescInfo.Type) client.clean(database_name) def test_array_external_table(): """ { "title": "", "describe": "创建含有array类型的外部表(hdfs csv),创建查询成功", "tag": "function,p1" } """ database_name, table_name, index_name = util.gen_name_list() client = common.create_workspace(database_name) property = {"broker_name": config.broker_name, "path": FILE.test_array_table_remote_file, "column_separator": "|", "format": "csv"} msg = 'Array can only be used in the non-key column of the duplicate table at present' util.assert_return(False, msg, client.create_external_table, table_name, SCHEMA.array_table_list, engine='broker', property=property, broker_property=config.broker_property) # ret = client.create_external_table(table_name, SCHEMA.array_table_list, engine='broker', # property=property, broker_property=config.broker_property) # assert ret, 'create array external table failed' # assert client.verify(FILE.expe_array_table_file, table_name), 'check data failed' client.clean(database_name) if __name__ == '__main__': setup_module()