466 lines
15 KiB
Python
466 lines
15 KiB
Python
#!/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
################################################################################
|
|
#
|
|
# @file: test_sys_materialized_view_shoot.py
|
|
# @date: 2020-09-03 11:12:43
|
|
# @brief: 验证duplicate表的物化视图的命中
|
|
################################################################################
|
|
|
|
"""
|
|
新增hll_union, count, bitmap_union聚合的物化视图
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.append("../")
|
|
from data import schema as DATA
|
|
from data import load_file as FILE
|
|
from lib import palo_client
|
|
from lib import palo_config
|
|
from lib import util
|
|
|
|
config = palo_config.config
|
|
LOG = palo_client.LOG
|
|
L = palo_client.L
|
|
|
|
broker_info = palo_config.broker_info
|
|
|
|
|
|
def get_explain_table(client, sql):
|
|
"""
|
|
Get explain table
|
|
"""
|
|
result = client.execute('EXPLAIN ' + sql)
|
|
if result is None:
|
|
return None
|
|
rollup_flag = 'rollup: '
|
|
explain_table = list()
|
|
for element in result:
|
|
message = element[0].lstrip()
|
|
if message.startswith(rollup_flag):
|
|
explain_table.append(message[len(rollup_flag):].rstrip(' '))
|
|
return explain_table
|
|
|
|
|
|
def check2(client, sql1, sql2):
|
|
"""check 2 sql same result"""
|
|
ret1 = client.execute(sql1)
|
|
ret2 = client.execute(sql2)
|
|
util.check(ret1, ret2)
|
|
|
|
|
|
def setup_module():
|
|
"""
|
|
Set up
|
|
"""
|
|
global query_db, database_name, tb_dup, tb_dup
|
|
global rollup_name1, rollup_name2, rollup_name3, rollup_name4, rollup_name5
|
|
global mv_name1, mv_name2, mv_name3, mv_name4, mv_name5, mv_name6, mv_name7, mv_name8, mv_name_9, mv_name10
|
|
if 'FE_DB' in os.environ.keys():
|
|
query_db = os.environ["FE_DB"]
|
|
else:
|
|
query_db = "test_query_qa"
|
|
database_name = 'test_sys_materialized_view_shoot_test_shoot_db'
|
|
tb_dup = 'test_shoot_tb_dup'
|
|
mv_name1 = 'mv1'
|
|
mv_name2 = 'mv2'
|
|
mv_name3 = 'mv3'
|
|
mv_name4 = 'mv4'
|
|
mv_name5 = 'mv5'
|
|
mv_name6 = 'mv6'
|
|
mv_name7 = 'mv7'
|
|
mv_name8 = 'mv8'
|
|
mv_name9 = 'mv9'
|
|
mv_name10 = 'mv10'
|
|
init_mv()
|
|
|
|
|
|
def init_mv():
|
|
"""init db, table and rollup"""
|
|
client = palo_client.get_client(config.fe_host, config.fe_query_port)
|
|
client.clean(database_name)
|
|
client.create_database(database_name)
|
|
client.use(database_name)
|
|
ret = client.create_table(tb_dup, DATA.baseall_column_no_agg_list,
|
|
distribution_info=palo_client.DistributionInfo("HASH(k2)", 5), set_null=True)
|
|
|
|
sql = 'select k1, k2, k3, k4, max(k8), sum(k9) from %s group by k1, k2, k3, k4' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name1, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name1)
|
|
|
|
sql = 'select k1, k3, max(k8), sum(k9) from %s group by k1, k3' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name2, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name2)
|
|
|
|
sql = 'select k1, k2, k4, max(k8), sum(k9) from %s group by k1, k2, k4' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name3, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name3)
|
|
|
|
sql = 'select k2, k1, max(k8), sum(k9) from %s group by k2, k1' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name4, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name4)
|
|
|
|
sql = 'select k2, k1, k3, k4, max(k8), sum(k9) from %s group by k2, k1, k3, k4' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name5, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name5)
|
|
|
|
sql = 'select k2, k1, k3, k4, count(k6), count(k7) from %s group by k2, k1, k3, k4' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name6, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name6)
|
|
|
|
sql = 'select k2, k1, k3, k4, hll_union(hll_hash(k10)), hll_union(hll_hash(k11)) from %s ' \
|
|
'group by k2, k1, k3, k4' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name7, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name7)
|
|
|
|
sql = 'select k2, k3, k4, bitmap_union(to_bitmap(k1)) from %s group by k2, k3, k4' % tb_dup
|
|
client.create_materialized_view(tb_dup, mv_name8, sql, is_wait=True)
|
|
assert client.show_tables(tb_dup)
|
|
assert client.get_index(tb_dup, index_name=mv_name8)
|
|
|
|
column_name_list = ['k1', 'k2', 'k3', 'k4', 'k5', 'k6', 'k10', 'k11', 'k7', 'k8', 'k9']
|
|
set_list = ['']
|
|
data_desc_list = palo_client.LoadDataInfo(FILE.baseall_hdfs_file, tb_dup)
|
|
ret = client.batch_load(util.get_label(), data_desc_list, is_wait=True, broker=broker_info)
|
|
assert ret
|
|
|
|
sql1 = 'select count(*) from %s.%s' % (database_name, tb_dup)
|
|
sql2 = 'select count(*) from %s.baseall' % query_db
|
|
check2(client, sql1, sql2)
|
|
|
|
|
|
def test_shoot_1():
|
|
"""
|
|
{
|
|
"title": "test_shoot_1",
|
|
"describe": "without where, sum命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
client.set_variables('test_materialized_view', 1)
|
|
assert client.show_variable('test_materialized_view')[0][1] == 'true'
|
|
|
|
sql = 'select sum(k9) from %s.%s' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name2 in shoot_table or mv_name4 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_2():
|
|
"""
|
|
{
|
|
"title": "test_shoot_2",
|
|
"describe": "where k1 = 1,点查询命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where k1 = 1' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name2 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where k1 = 1' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_3():
|
|
"""
|
|
{
|
|
"title": "test_shoot_3",
|
|
"describe": "where k1 > 1,范围查询命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where k1 > 1' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name2 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where k1 > 1' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_4():
|
|
"""
|
|
{
|
|
"title": "test_shoot_4",
|
|
"describe": "where k1=1 and k2=1,and 命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where k1 = 1 and k2 = 1' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name4 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where k1 = 1 and k2 = 1' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_5():
|
|
"""
|
|
{
|
|
"title": "test_shoot_5",
|
|
"describe": "where k1>1 and k2=1,and命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where k1 > 1 and k2 = 1' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name4 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where k1 > 1 and k2 = 1' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_6():
|
|
"""
|
|
{
|
|
"title": "test_shoot_6",
|
|
"describe": "where k4>1,命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where k4 > 1' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name3 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where k4 > 1' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_7():
|
|
"""
|
|
{
|
|
"title": "test_shoot_7",
|
|
"describe": "where k1=1 and k2>1,命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where k1 = 1 and k2 > 1' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name3 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where k1 = 1 and k2 > 1' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_8():
|
|
"""
|
|
{
|
|
"title": "test_shoot_8",
|
|
"describe": "where k1=1 and k2=1 and k3=1命中中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where k1 = 1 and k2 = 1 and k3 > 1' \
|
|
% (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name1 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where k1 = 1 and k2 = 1 and k3 > 1' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_9():
|
|
"""
|
|
{
|
|
"title": "test_shoot_9",
|
|
"describe": " where cast,命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(k9) from %s.%s where cast(k2 as int) < 10000' % (database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name4 in shoot_table
|
|
check_sql = 'select sum(k9) from %s.baseall where cast(k2 as int) < 10000' % query_db
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_10():
|
|
"""
|
|
{
|
|
"title": "test_shoot_10",
|
|
"describe": "where join,命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
# 左表
|
|
|
|
sql = 'select sum(a.k9) from %s.%s a join %s.%s b on a.k1 = b.k1' \
|
|
% (database_name, tb_dup, database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name2 in shoot_table
|
|
check_sql = 'select sum(a.k9) from %s.baseall a join %s.baseall b on a.k1 = b.k1' \
|
|
% (query_db, query_db)
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_11():
|
|
"""
|
|
{
|
|
"title": "test_shoot_11",
|
|
"describe": "where join on,命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(a.k9) from %s.%s a join %s.%s b on a.k1 = b.k1 and a.k2 = 2' \
|
|
% (database_name, tb_dup, database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name4 in shoot_table
|
|
check_sql = 'select sum(a.k9) from %s.baseall a join %s.baseall b on a.k1 = b.k1 ' \
|
|
'and a.k2 = 2' % (query_db, query_db)
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_12():
|
|
"""
|
|
{
|
|
"title": "test_shoot_12",
|
|
"describe": "where join on cas,命中rollup",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql = 'select sum(a.k9) from %s.%s a join %s.%s b on a.k1 = cast(hex(b.k1) as int) ' \
|
|
'and a.k2 = 2' % (database_name, tb_dup, database_name, tb_dup)
|
|
shoot_table = get_explain_table(client, sql)
|
|
LOG.info(L('shoot table:', shoot_table=shoot_table))
|
|
assert mv_name4 in shoot_table
|
|
check_sql = 'select sum(a.k9) from %s.baseall a join %s.baseall b on ' \
|
|
'a.k1 = cast(hex(b.k1) as int) and a.k2 = 2' % (query_db, query_db)
|
|
check2(client, sql, check_sql)
|
|
|
|
|
|
def test_shoot_13():
|
|
"""
|
|
{
|
|
"title": "test_shoot_13",
|
|
"describe": "count, hll_union, bitmap_union聚合查询测试",
|
|
"tag": "system,p1"
|
|
}
|
|
"""
|
|
client = palo_client.PaloClient(config.fe_host, config.fe_query_port)
|
|
client.init()
|
|
client.use(database_name)
|
|
|
|
sql1 = 'select bitmap_union_count(to_bitmap(%s)) from %s where 1=1'
|
|
sql2 = 'select bitmap_union(to_bitmap(%s)) from %s where 1=1'
|
|
sql3 = 'select count(distinct %s) from %s where 1=1'
|
|
sql4 = 'select count(%s) from %s where 1=1'
|
|
sql5 = 'select hll_cardinality(hll_raw_agg(hll_hash(%s))) from %s where 1=1'
|
|
sql6 = 'select hll_union_agg(hll_hash(%s)) from %s where 1=1'
|
|
sql7 = 'select ndv(%s) from %s where 1=1'
|
|
sql8 = 'select approx_count_distinct(%s) from %s where 1=1'
|
|
check(client, sql1, 'k1', mv_name8)
|
|
check(client, sql2, 'k1', mv_name8)
|
|
check(client, sql3, 'k1', mv_name8)
|
|
check(client, sql4, 'k1', tb_dup)
|
|
check(client, sql5, 'k1', tb_dup)
|
|
check(client, sql6, 'k1', tb_dup)
|
|
check(client, sql7, 'k1', tb_dup)
|
|
check(client, sql8, 'k1', tb_dup)
|
|
check(client, sql3, 'k6', tb_dup)
|
|
check(client, sql4, 'k6', mv_name6)
|
|
check(client, sql5, 'k6', tb_dup)
|
|
check(client, sql6, 'k6', tb_dup)
|
|
check(client, sql7, 'k6', tb_dup)
|
|
check(client, sql8, 'k6', tb_dup)
|
|
check(client, sql3, 'k11', tb_dup)
|
|
check(client, sql4, 'k11', tb_dup)
|
|
check(client, sql5, 'k11', mv_name7)
|
|
check(client, sql6, 'k11', mv_name7)
|
|
check(client, sql7, 'k11', mv_name7)
|
|
check(client, sql8, 'k11', mv_name7)
|
|
|
|
|
|
def check(client, sql, col, mv):
|
|
"""check explain rollup and result"""
|
|
sql1 = sql % (col, tb_dup)
|
|
sql2 = sql % (col, 'test_query_qa.baseall')
|
|
shoot_mv = get_explain_table(client, sql1)
|
|
print(mv, shoot_mv)
|
|
check2(client, sql1, sql2)
|
|
assert mv in shoot_mv
|
|
|
|
|
|
def teardown_module():
|
|
"""tear down"""
|
|
pass
|
|
|
|
|
|
|