392 lines
16 KiB
Python
392 lines
16 KiB
Python
#!/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
############################################################################
|
|
#
|
|
# @file test_create_dynamic_partition.py
|
|
# @date 2021/06/23 10:28:20
|
|
# @brief This file is a test file for doris dynamic partition parameters.
|
|
#
|
|
#############################################################################
|
|
|
|
"""
|
|
测试动态分区导入
|
|
"""
|
|
|
|
import time
|
|
import datetime
|
|
import sys
|
|
import os
|
|
sys.path.append("../")
|
|
from dateutil.relativedelta import relativedelta
|
|
from lib import palo_config
|
|
from lib import palo_client
|
|
from lib import util
|
|
|
|
client = None
|
|
config = palo_config.config
|
|
LOG = palo_client.LOG
|
|
L = palo_client.L
|
|
broker_info = palo_config.broker_info
|
|
file_path = os.path.split(os.path.realpath(__file__))[0]
|
|
random_time = [0, 1, 3, 5, 10, 3, 5, 20, 70, 40, -5, -3, -1, -1, -2, 0, 3, 2, 0, 9, 11, 10, -9, -10,
|
|
300, 400, -300, -400, 100, -100, -100, 365, -365, 120, 0, -50, 50, 30, 31, 101]
|
|
|
|
random_char = ['fnjl', 'fsd', 'afd', 'dsf', 'afl', 'ore', 'etou', 'jor', 'fljf', 'fjlk', \
|
|
'hro', 'af', 'fef', 'poet', 'nvfk', 'aln', 'rio', 'ro', 'aaa', 'rou', \
|
|
'orto', 'aeor', 'pjp', 'tuhi', 'mvkl', 'fnl', 'gor', 'roo', 'froh', 'roc', \
|
|
'tqie', 'cz', 'fl', 'ajpf', 'vmjl', 'ep', 'vml', 'pjo', 'nk', 'sss']
|
|
|
|
|
|
def setup_module():
|
|
"""
|
|
setUp
|
|
"""
|
|
global client
|
|
client = palo_client.get_client(config.fe_host, config.fe_query_port, user=config.fe_user, \
|
|
password=config.fe_password, http_port=config.fe_http_port)
|
|
|
|
|
|
def create(table_name, datetype, info, keys_desc=None, column=None):
|
|
"""
|
|
建表
|
|
info:动态分区建表参数
|
|
"""
|
|
partition_info = palo_client.PartitionInfo('k2', [], [])
|
|
distribution_info = palo_client.DistributionInfo('HASH(k2)', 3)
|
|
dynamic_info = palo_client.DynamicPartitionInfo(info)
|
|
dynamic = dynamic_info.to_string()
|
|
if column is None:
|
|
column = [('k1', 'int'), ('k2', datetype), ('k3', 'int'), ('k4', 'varchar(16)')]
|
|
client.create_table(table_name, column, partition_info, distribution_info, dynamic_partition_info=dynamic,
|
|
keys_desc=keys_desc)
|
|
assert client.show_tables(table_name), 'fail to create table'
|
|
|
|
|
|
def load_value(time_unit, table_name, database_name):
|
|
"""
|
|
按照不同的time_unit生成insert语句和验证结果
|
|
"""
|
|
now = datetime.datetime.now()
|
|
value_list = ''
|
|
result = []
|
|
count = 1
|
|
if time_unit == 'DAY':
|
|
for i in range(40):
|
|
date_value = (now + datetime.timedelta(days=random_time[i])).strftime('%Y-%m-%d')
|
|
value_list = '%s (%s, "%s", %s, "%s"),' % (value_list, count, date_value, random_time[i], random_char[i])
|
|
if -10 <= random_time[i] <= 10:
|
|
year, month, day = time.strptime(date_value, '%Y-%m-%d')[:3]
|
|
result = [(count, datetime.date(year, month, day), random_time[i], random_char[i])] + result
|
|
count += 1
|
|
if time_unit == 'HOUR':
|
|
for i in range(40):
|
|
date_value = (now + datetime.timedelta(hours=random_time[i])).strftime('%Y-%m-%d %H:00:00')
|
|
value_list = '%s (%s, "%s", %s, "%s"),' % (value_list, count, date_value, random_time[i], random_char[i])
|
|
if -10 <= random_time[i] <= 10:
|
|
year, month, day, hour = time.strptime(date_value, '%Y-%m-%d %H:%M:%S')[:4]
|
|
result = [(count, datetime.datetime(year, month, day, hour), random_time[i], random_char[i])] + result
|
|
count += 1
|
|
if time_unit == 'WEEK':
|
|
for i in range(40):
|
|
date_value = (now + datetime.timedelta(days=random_time[i])).strftime('%Y-%m-%d')
|
|
value_list = '%s (%s, "%s", %s, "%s"),' % (value_list, count, date_value, random_time[i], random_char[i])
|
|
today = datetime.datetime.now().weekday()
|
|
if -70 - today <= random_time[i] <= 77 - today:
|
|
year, month, day = time.strptime(date_value, '%Y-%m-%d')[:3]
|
|
result = [(count, datetime.date(year, month, day), random_time[i], random_char[i])] + result
|
|
count += 1
|
|
if time_unit == 'MONTH':
|
|
for i in range(40):
|
|
date_value = (now + datetime.timedelta(days=random_time[i])).strftime('%Y-%m-%d')
|
|
value_list = '%s (%s, "%s", %s, "%s"),' % (value_list, count, date_value, random_time[i], random_char[i])
|
|
start_day = (now - relativedelta(months=10)).strftime('%Y-%m-01')
|
|
end_day = (now + relativedelta(months=11)).strftime('%Y-%m-01')
|
|
if start_day <= date_value < end_day:
|
|
year, month, day = time.strptime(date_value, '%Y-%m-%d')[:3]
|
|
result = [(count, datetime.date(year, month, day), random_time[i], random_char[i])] + result
|
|
count += 1
|
|
sql = 'insert into %s values %s' % (table_name, value_list[:-1])
|
|
return sql, tuple(sorted(result))
|
|
|
|
|
|
def check_insert(datetype, time_unit, table_name, database_name=None):
|
|
"""
|
|
insert导入数据,查询结果
|
|
eg: database_name = test_sys_dynamic_partition_load_test_day_db
|
|
table_name = test_sys_dynamic_partition_load_test_day_tb
|
|
"""
|
|
sql, result = load_value(time_unit, table_name, database_name)
|
|
client.set_variables('enable_insert_strict', 'false')
|
|
ret = client.execute(sql)
|
|
assert ret == (), "Failed to insert data"
|
|
sql = "SELECT * FROM %s.%s ORDER BY k1" % (database_name, table_name)
|
|
ret = client.execute(sql)
|
|
util.check(ret, result)
|
|
client.drop_table(table_name)
|
|
|
|
|
|
def check_broker_load(datetype, time_unit, table_name, database_name=None):
|
|
"""
|
|
broker load 导入数据及结果验证
|
|
"""
|
|
load_file_path = palo_config.gen_remote_file_path("sys/dynamic_partition/test_dynamic_partition_load.data")
|
|
column_name_list = ['k1', 'k3', 'k4']
|
|
if time_unit == 'HOUR':
|
|
set_list_value = ['k2=hours_add(now(),k3)']
|
|
else:
|
|
set_list_value = ['k2=date_add(now(),k3)']
|
|
load_data_file = palo_client.LoadDataInfo(load_file_path, table_name, column_name_list=column_name_list,
|
|
column_terminator=' ', set_list=set_list_value)
|
|
assert client.batch_load(util.get_label(), load_data_file, is_wait=True, broker=broker_info,
|
|
max_filter_ratio=0.7, strict_mode=False)
|
|
tmp, result = load_value(time_unit, table_name, database_name)
|
|
if time_unit != 'HOUR':
|
|
sql = 'select * from %s order by k1' % (table_name)
|
|
elif time_unit == 'HOUR':
|
|
sql = 'select k1,str_to_date(date_format(k2, "%%Y-%%m-%%d %%H"),"%%Y-%%m-%%d %%H") \
|
|
as k2,k3,k4 from %s order by k1' % (table_name)
|
|
ret = client.execute(sql)
|
|
util.check(ret, result)
|
|
client.drop_table(table_name)
|
|
|
|
|
|
def check_stream_load(datetype, time_unit, table_name, database_name=None):
|
|
"""
|
|
stream load 导入数据及结果验证
|
|
"""
|
|
data_file = "%s/data/STREAM_LOAD/test_dynamic_partition_load.data" % file_path
|
|
if time_unit != 'HOUR':
|
|
column = ["c1,c2,c3,k1=c1,k2=date_add(now(),c2),k3=c2,k4=c3"]
|
|
else:
|
|
column = ["c1,c2,c3,k1=c1,k2=hours_add(now(),c2),k3=c2,k4=c3"]
|
|
assert client.stream_load(table_name, data_file, max_filter_ratio=0.7, column_name_list=column,
|
|
column_separator=',')
|
|
tmp, result = load_value(time_unit, table_name, database_name)
|
|
if time_unit != 'HOUR':
|
|
sql = 'select * from %s order by k1' % (table_name)
|
|
else:
|
|
sql = 'select k1,str_to_date(date_format(k2, "%%Y-%%m-%%d %%H"),"%%Y-%%m-%%d %%H") \
|
|
as k2,k3,k4 from %s order by k1' % (table_name)
|
|
ret = client.execute(sql)
|
|
util.check(ret, result)
|
|
client.drop_table(table_name)
|
|
|
|
|
|
def test_day():
|
|
"""
|
|
{
|
|
"title": "test_sys_dynamic_partition_load.test_day",
|
|
"describe": "导入数据到动态分区,time_unit=DAY",
|
|
"tag": "p1,function"
|
|
}
|
|
"""
|
|
database_name, table_name, index_name = util.gen_num_format_name_list()
|
|
LOG.info(L('', database_name=database_name, table_name=table_name, index_name=index_name))
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
dynamic_partition_info = {'enable': 'true', 'time_unit': 'DAY', 'start': -10, 'end': 10, 'prefix': 'p',
|
|
'buckets': 10, 'create_history_partition': 'true'}
|
|
#insert
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_insert('date', 'DAY', table_name, database_name)
|
|
#broker load
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_broker_load('date', 'DAY', table_name, database_name)
|
|
#stream load
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_stream_load('date', 'DAY', table_name, database_name)
|
|
client.clean(database_name)
|
|
|
|
|
|
def test_hour():
|
|
"""
|
|
{
|
|
"title": "test_sys_dynamic_partition_load.test_hour",
|
|
"describe": "导入数据到动态分区,time_unit=HOUR",
|
|
"tag": "p1,function"
|
|
}
|
|
"""
|
|
database_name, table_name, index_name = util.gen_num_format_name_list()
|
|
LOG.info(L('', database_name=database_name, table_name=table_name, index_name=index_name))
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
dynamic_partition_info = {'enable': 'true', 'time_unit': 'HOUR', 'start': -10, 'end': 10, 'prefix': 'p',
|
|
'buckets': 10, 'create_history_partition': 'true'}
|
|
#insert
|
|
if time.strftime("%M", time.localtime()) == '59':
|
|
time.sleep(90)
|
|
create(table_name, 'datetime', dynamic_partition_info)
|
|
check_insert('datetime', 'HOUR', table_name, database_name)
|
|
#broker load
|
|
create(table_name, 'datetime', dynamic_partition_info)
|
|
check_broker_load('datetime', 'HOUR', table_name, database_name)
|
|
#stream load
|
|
create(table_name, 'datetime', dynamic_partition_info)
|
|
check_stream_load('datetime', 'HOUR', table_name, database_name)
|
|
client.clean(database_name)
|
|
|
|
|
|
def test_week():
|
|
"""
|
|
{
|
|
"title": "test_sys_dynamic_partition_load.test_week",
|
|
"describe": "导入数据到动态分区,time_unit=WEEK",
|
|
"tag": "p1,function"
|
|
}
|
|
"""
|
|
database_name, table_name, index_name = util.gen_num_format_name_list()
|
|
LOG.info(L('', database_name=database_name, table_name=table_name, index_name=index_name))
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
dynamic_partition_info = {'enable': 'true', 'time_unit': 'WEEK', 'start': -10, 'end': 10, 'prefix': 'p',
|
|
'buckets': 10, 'create_history_partition': 'true'}
|
|
#insert
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_insert('date', 'WEEK', table_name, database_name)
|
|
#broker load
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_broker_load('date', 'WEEK', table_name, database_name)
|
|
#stream load
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_stream_load('date', 'WEEK', table_name, database_name)
|
|
client.clean(database_name)
|
|
|
|
|
|
def test_month():
|
|
"""
|
|
{
|
|
"title": "test_sys_dynamic_partition_load.test_month",
|
|
"describe": "导入数据到动态分区,time_unit=MONTH",
|
|
"tag": "p1,function"
|
|
}
|
|
"""
|
|
database_name, table_name, index_name = util.gen_num_format_name_list()
|
|
LOG.info(L('', database_name=database_name, table_name=table_name, index_name=index_name))
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
dynamic_partition_info = {'enable': 'true', 'time_unit': 'MONTH', 'start': -10, 'end': 10, 'prefix': 'p',
|
|
'buckets': 10, 'create_history_partition': 'true'}
|
|
#insert
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_insert('date', 'MONTH', table_name, database_name)
|
|
#broker load
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_broker_load('date', 'MONTH', table_name, database_name)
|
|
#stream load
|
|
create(table_name, 'date', dynamic_partition_info)
|
|
check_stream_load('date', 'MONTH', table_name, database_name)
|
|
client.clean(database_name)
|
|
|
|
|
|
def test_aggregate_load():
|
|
"""
|
|
{
|
|
"title": "test_sys_dynamic_partition_load.test_aggregate__load",
|
|
"describe": "AGGREGATE聚合表导入数据",
|
|
"tag": "p1,function"
|
|
}
|
|
"""
|
|
database_name, table_name, index_name = util.gen_num_format_name_list()
|
|
LOG.info(L('', database_name=database_name, table_name=table_name, index_name=index_name))
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
dynamic_partition_info = {'enable': 'true', 'time_unit': 'DAY', 'start': -10, 'end': 10, 'prefix': 'p',
|
|
'buckets': 10, 'create_history_partition': 'true'}
|
|
column = [('k1', 'int'), ('k2', 'date'), ('k3', 'int'), ('k4', 'varchar(16)', 'replace')]
|
|
keys_desc = 'AGGREGATE KEY(k1, k2, k3)'
|
|
#insert
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc, column)
|
|
check_insert('date', 'DAY', table_name, database_name)
|
|
#broker load
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc, column)
|
|
check_broker_load('date', 'DAY', table_name, database_name)
|
|
#stream load
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc, column)
|
|
check_stream_load('date', 'DAY', table_name, database_name)
|
|
client.clean(database_name)
|
|
|
|
|
|
def test_unique_load():
|
|
"""
|
|
{
|
|
"title": "test_sys_dynamic_partition_load.test_unique_load",
|
|
"describe": "UNIQUE聚合表导入数据",
|
|
"tag": "p1,function"
|
|
}
|
|
"""
|
|
database_name, table_name, index_name = util.gen_num_format_name_list()
|
|
LOG.info(L('', database_name=database_name, table_name=table_name, index_name=index_name))
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
dynamic_partition_info = {'enable': 'true', 'time_unit': 'DAY', 'start': -10, 'end': 10, 'prefix': 'p',
|
|
'buckets': 10, 'create_history_partition': 'true'}
|
|
keys_desc = 'UNIQUE KEY(k1, k2)'
|
|
#insert
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc)
|
|
check_insert('date', 'DAY', table_name, database_name)
|
|
#broker load
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc)
|
|
check_broker_load('date', 'DAY', table_name, database_name)
|
|
#stream load
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc)
|
|
check_stream_load('date', 'DAY', table_name, database_name)
|
|
client.clean(database_name)
|
|
|
|
|
|
def test_duplicate_load():
|
|
"""
|
|
{
|
|
"title": "test_sys_dynamic_partition_load.test_duplicate_load",
|
|
"describe": "DUPLICATE聚合表导入数据",
|
|
"tag": "p1,function"
|
|
}
|
|
"""
|
|
database_name, table_name, index_name = util.gen_num_format_name_list()
|
|
LOG.info(L('', database_name=database_name, table_name=table_name, index_name=index_name))
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
dynamic_partition_info = {'enable': 'true', 'time_unit': 'DAY', 'start': -10, 'end': 10, 'prefix': 'p',
|
|
'buckets': 10, 'create_history_partition': 'true'}
|
|
keys_desc = 'DUPLICATE KEY(k1, k2)'
|
|
#insert
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc)
|
|
check_insert('date', 'DAY', table_name, database_name)
|
|
#broker load
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc)
|
|
check_broker_load('date', 'DAY', table_name, database_name)
|
|
#stream load
|
|
create(table_name, 'date', dynamic_partition_info, keys_desc)
|
|
check_stream_load('date', 'DAY', table_name, database_name)
|
|
client.clean(database_name)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
setup_module()
|
|
test_day()
|
|
|
|
|