Files
openGauss-server/src/test/regress/output/fdw_subquery.source
dengxuyue 1567043064 同步source code
日期: 12-26
    revision: ee5b054c
2020-12-28 22:19:21 +08:00

1044 lines
54 KiB
Plaintext

--
--Create table
--
--Foreign table
CREATE SERVER hdfs_server FOREIGN DATA WRAPPER HDFS_FDW OPTIONS (type 'hdfs', address '@hdfshostname@:@hdfsport@',hdfscfgpath '@hdfscfgpath@');
CREATE FOREIGN TABLE customer_reviews
(
customer_id TEXT ,
review_date DATE ,
review_rating INTEGER ,
review_votes INTEGER,
review_helpful_votes INTEGER,
product_id CHAR(10) ,
product_title TEXT ,
product_sales_rank BIGINT,
product_group TEXT,
product_category TEXT,
product_subcategory TEXT,
similar_product_ids CHAR(10)
)
SERVER hdfs_server
OPTIONS(format 'orc', foldername '/user/hive/warehouse/customer_reviews') distribute by roundrobin;
DROP FOREIGN TABLE IF EXISTS d_bigrow;
NOTICE: foreign table "d_bigrow" does not exist, skipping
CREATE FOREIGN TABLE d_bigrow(
boolean1 BOOLEAN,
short1 INT2,
integer1 INT,
long1 INT8,
list1 INT,
float1 FLOAT4,
double1 FLOAT8,
string1 TEXT,
list2 TEXT,
date1 DATE,
timestamp1 TIMESTAMP
) SERVER hdfs_server
OPTIONS(format 'orc', foldername '/user/hive/warehouse/d_bigrow') distribute by roundrobin;
--Regular table
create table t (c1 int, c2 int) distribute by roundrobin;
create table s (b1 int, b2 int, b3 int) distribute by roundrobin;
create table r (a1 int, a2 int, a3 int) distribute by roundrobin;
insert into t values (1, 2), (2, 3), (3, 4), (4, 5);
insert into s values (1, 2, 2), (2, 3, 3), (3, 4, 4), (4, 5, 5);
insert into r values (3, 2,1 ), (4,3,2), (5, 4, 3), (6, 5, 4);
--TPCH table
CREATE TABLE customer (
c_custkey integer NOT NULL,
c_name character varying(25) NOT NULL,
c_address character varying(40) NOT NULL,
c_nationkey integer NOT NULL,
c_phone character(15) NOT NULL,
c_acctbal numeric(15,2) NOT NULL,
c_mktsegment character(10) NOT NULL,
c_comment character varying(117) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE lineitem (
l_orderkey integer NOT NULL,
l_partkey integer NOT NULL,
l_suppkey integer NOT NULL,
l_linenumber integer NOT NULL,
l_quantity numeric(15,2) NOT NULL,
l_extendedprice numeric(15,2) NOT NULL,
l_discount numeric(15,2) NOT NULL,
l_tax numeric(15,2) NOT NULL,
l_returnflag character(1) NOT NULL,
l_linestatus character(1) NOT NULL,
l_shipdate date NOT NULL,
l_commitdate date NOT NULL,
l_receiptdate date NOT NULL,
l_shipinstruct character(25) NOT NULL,
l_shipmode character(10) NOT NULL,
l_comment character varying(44) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE nation (
n_nationkey integer NOT NULL,
n_name character(25) NOT NULL,
n_regionkey integer NOT NULL,
n_comment character varying(152)
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE orders (
o_orderkey integer NOT NULL,
o_custkey integer NOT NULL,
o_orderstatus character(1) NOT NULL,
o_totalprice numeric(15,2) NOT NULL,
o_orderdate date NOT NULL,
o_orderpriority character(15) NOT NULL,
o_clerk character(15) NOT NULL,
o_shippriority integer NOT NULL,
o_comment character varying(79) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE part (
p_partkey integer NOT NULL,
p_name character varying(55) NOT NULL,
p_mfgr character(25) NOT NULL,
p_brand character(10) NOT NULL,
p_type character varying(25) NOT NULL,
p_size integer NOT NULL,
p_container character(10) NOT NULL,
p_retailprice numeric(15,2) NOT NULL,
p_comment character varying(23) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE partsupp (
ps_partkey integer NOT NULL,
ps_suppkey integer NOT NULL,
ps_availqty integer NOT NULL,
ps_supplycost numeric(15,2) NOT NULL,
ps_comment character varying(199) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE region (
r_regionkey integer NOT NULL,
r_name character(25) NOT NULL,
r_comment character varying(152)
)
DISTRIBUTE BY ROUNDROBIN ;
CREATE TABLE supplier (
s_suppkey integer NOT NULL,
s_name character(25) NOT NULL,
s_address character varying(40) NOT NULL,
s_nationkey integer NOT NULL,
s_phone character(15) NOT NULL,
s_acctbal numeric(15,2) NOT NULL,
s_comment character varying(101) NOT NULL
)
DISTRIBUTE BY ROUNDROBIN ;
\i @abs_srcdir@/data/tpch.data
COPY customer (c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment) FROM stdin;
--
--
COPY lineitem (l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment) FROM stdin;
--
--
COPY nation (n_nationkey, n_name, n_regionkey, n_comment) FROM stdin;
--
--
COPY orders (o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment) FROM stdin;
--
--
COPY part (p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment) FROM stdin;
--
--
COPY partsupp (ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment) FROM stdin;
--
--
COPY region (r_regionkey, r_name, r_comment) FROM stdin;
--
--
COPY supplier (s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment) FROM stdin;
--
--Test Begin
--
--*Disable fstream
explain (verbose on, costs off) select * from t where c1 in (select b1 from s where c2=b2) order by c1;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: t.c1, t.c2
Sort Key: t.c1
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE true ORDER BY 1
Coordinator quals: (SubPlan 1)
SubPlan 1
-> Data Node Scan on s "_REMOTE_TABLE_QUERY_"
Output: s.b1
Remote query: SELECT b1, b2 FROM ONLY public.s WHERE true
Coordinator quals: (t.c2 = s.b2)
(12 rows)
explain (verbose on, costs off) select * from d_bigrow where short1 in (select review_rating from customer_reviews where review_votes = integer1) order by short1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Row Adapter
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
-> Vector Streaming (type: GATHER)
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
Merge Sort Key: d_bigrow.short1
-> Vector Sort
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
Sort Key: d_bigrow.short1
-> Vector Foreign Scan on public.d_bigrow
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
Filter: (SubPlan 1)
Server Type: hdfs
Orc File: /user/hive/warehouse/d_bigrow
SubPlan 1
-> Row Adapter
Output: customer_reviews.review_rating
-> Vector Result
Output: customer_reviews.review_rating
Filter: (customer_reviews.review_votes = d_bigrow.integer1)
-> Vector Materialize
Output: customer_reviews.review_rating, customer_reviews.review_votes
-> Vector Streaming(type: BROADCAST)
Output: customer_reviews.review_rating, customer_reviews.review_votes
-> Vector Foreign Scan on public.customer_reviews
Output: customer_reviews.review_rating, customer_reviews.review_votes
Server Type: hdfs
Orc File: /user/hive/warehouse/customer_reviews
(27 rows)
select * from t where c1 in (select b1 from s where c2=b2) order by c1;
c1 | c2
----+----
1 | 2
2 | 3
3 | 4
4 | 5
(4 rows)
select * from d_bigrow where short1 in (select review_rating from customer_reviews where review_votes = integer1) order by short1;
boolean1 | short1 | integer1 | long1 | list1 | float1 | double1 | string1 | list2 | date1 | timestamp1
----------+--------+----------+-------+-------+--------+---------+---------+-------+-------+--------------------------
| 10 | 20 | 3 | 4 | 5 | 6 | a | b | | Tue Oct 19 09:23:54 2004
(1 row)
--Remote sort
explain (verbose on, costs off) select * from (select all * from t where c1 < 3 order by c1);
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
--------------------------------------------------------------------------------
Sort
Output: t.c1, t.c2
Sort Key: t.c1
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE c1 < 3 ORDER BY 1
(6 rows)
select * from (select all * from t where c1 < 3 order by c1);
c1 | c2
----+----
1 | 2
2 | 3
(2 rows)
--Agg
with revenue (total_revenue) as
( select sum(l_extendedprice * (1 - l_discount)) from lineitem where l_suppkey=1 )
select * from revenue;
total_revenue
---------------
21495562.0223
(1 row)
--*Enable fstream
explain (verbose on, costs off) select * from t where c1 in (select b1 from s where c2=b2) order by c1;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: t.c1, t.c2
Sort Key: t.c1
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE true ORDER BY 1
Coordinator quals: (SubPlan 1)
SubPlan 1
-> Data Node Scan on s "_REMOTE_TABLE_QUERY_"
Output: s.b1
Remote query: SELECT b1, b2 FROM ONLY public.s WHERE true
Coordinator quals: (t.c2 = s.b2)
(12 rows)
select * from t where c1 in (select b1 from s where c2=b2) order by c1;
c1 | c2
----+----
1 | 2
2 | 3
3 | 4
4 | 5
(4 rows)
explain (verbose on, costs off) select * from d_bigrow where short1 in (select review_rating from customer_reviews where review_votes = integer1) order by short1;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Row Adapter
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
-> Vector Streaming (type: GATHER)
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
Merge Sort Key: d_bigrow.short1
-> Vector Sort
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
Sort Key: d_bigrow.short1
-> Vector Foreign Scan on public.d_bigrow
Output: d_bigrow.boolean1, d_bigrow.short1, d_bigrow.integer1, d_bigrow.long1, d_bigrow.list1, d_bigrow.float1, d_bigrow.double1, d_bigrow.string1, d_bigrow.list2, d_bigrow.date1, d_bigrow.timestamp1
Filter: (SubPlan 1)
Server Type: hdfs
Orc File: /user/hive/warehouse/d_bigrow
SubPlan 1
-> Row Adapter
Output: customer_reviews.review_rating
-> Vector Result
Output: customer_reviews.review_rating
Filter: (customer_reviews.review_votes = d_bigrow.integer1)
-> Vector Materialize
Output: customer_reviews.review_rating, customer_reviews.review_votes
-> Vector Streaming(type: BROADCAST)
Output: customer_reviews.review_rating, customer_reviews.review_votes
-> Vector Foreign Scan on public.customer_reviews
Output: customer_reviews.review_rating, customer_reviews.review_votes
Server Type: hdfs
Orc File: /user/hive/warehouse/customer_reviews
(27 rows)
select * from d_bigrow where short1 in (select review_rating from customer_reviews where review_votes = integer1) order by short1;
boolean1 | short1 | integer1 | long1 | list1 | float1 | double1 | string1 | list2 | date1 | timestamp1
----------+--------+----------+-------+-------+--------+---------+---------+-------+-------+--------------------------
| 10 | 20 | 3 | 4 | 5 | 6 | a | b | | Tue Oct 19 09:23:54 2004
(1 row)
explain (verbose on, costs off) select * from t where c1 in (select sum(b2) sb2 from s group by b1 having c2 = sum(b2)) order by c1;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
-------------------------------------------------------------------------------------------------
Sort
Output: t.c1, t.c2
Sort Key: t.c1
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE true ORDER BY 1
Coordinator quals: (SubPlan 1)
SubPlan 1
-> HashAggregate
Output: pg_catalog.sum((sum(s.b2))), s.b1
Group By Key: s.b1
Filter: (t.c2 = pg_catalog.sum((sum(s.b2))))
-> Data Node Scan on "__REMOTE_GROUP_QUERY__"
Output: (sum(s.b2)), s.b1
Remote query: SELECT sum(b2), b1 FROM ONLY public.s WHERE true GROUP BY 2
(15 rows)
explain (verbose on, costs off) select * from t where c1 in (select b1 from s group by b1 having sum(b2) = c2) order by c1;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
-------------------------------------------------------------------------------------------------
Sort
Output: t.c1, t.c2
Sort Key: t.c1
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE true ORDER BY 1
Coordinator quals: (SubPlan 1)
SubPlan 1
-> HashAggregate
Output: s.b1
Group By Key: s.b1
Filter: (pg_catalog.sum((sum(s.b2))) = t.c2)
-> Data Node Scan on "__REMOTE_GROUP_QUERY__"
Output: s.b1, (sum(s.b2))
Remote query: SELECT b1, sum(b2) FROM ONLY public.s WHERE true GROUP BY 1
(15 rows)
select * from t where c1 in (select b1 from s group by b1 having sum(b2) = c2) order by c1;
c1 | c2
----+----
1 | 2
2 | 3
3 | 4
4 | 5
(4 rows)
explain (verbose on, costs off) select * from t where c1 in (select b1 from s, r where a2 = b2 group by b1 having sum(b3) = c2) order by c1;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
-------------------------------------------------------------------------------------------
Sort
Output: t.c1, t.c2
Sort Key: t.c1
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE true ORDER BY 1
Coordinator quals: (SubPlan 1)
SubPlan 1
-> HashAggregate
Output: s.b1
Group By Key: s.b1
Filter: (sum(s.b3) = t.c2)
-> Hash Join
Output: s.b1, s.b3
Hash Cond: (s.b2 = r.a2)
-> Data Node Scan on s "_REMOTE_TABLE_QUERY_"
Output: s.b1, s.b3, s.b2
Remote query: SELECT b1, b3, b2 FROM ONLY public.s WHERE true
-> Hash
Output: r.a2
-> Data Node Scan on r "_REMOTE_TABLE_QUERY_"
Output: r.a2
Remote query: SELECT a2 FROM ONLY public.r WHERE true
(23 rows)
select * from t where c1 in (select b1 from s, r where a2 = b2 group by b1 having sum(b3) = c2) order by c1;
c1 | c2
----+----
1 | 2
2 | 3
3 | 4
4 | 5
(4 rows)
--Remote sort
explain (verbose on, costs off) select * from (select all * from t where c1 < 3 order by c1);
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
--------------------------------------------------------------------------------
Sort
Output: t.c1, t.c2
Sort Key: t.c1
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE c1 < 3 ORDER BY 1
(6 rows)
explain performance select * from (select all * from t where c1 < 3 order by c1);
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------
--? .*
Output: t.c1, t.c2
Sort Key: t.c1
--? .*
--? (CPU: ex c/r=.*, ex cyc=.*, inc cyc=.*)
--? .*
Output: t.c1, t.c2
Node/s: All datanodes
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE c1 < 3 ORDER BY 1
--? .*
--? .*
--?.*
select * from (select all * from t where c1 < 3 order by c1);
c1 | c2
----+----
1 | 2
2 | 3
(2 rows)
explain (verbose on, costs off) select * from (select all * from orders where o_orderkey < 3 order by o_orderkey);
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Output: orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice, orders.o_orderdate, orders.o_orderpriority, orders.o_clerk, orders.o_shippriority, orders.o_comment
Sort Key: orders.o_orderkey
-> Data Node Scan on "__REMOTE_SORT_QUERY__"
Output: orders.o_orderkey, orders.o_custkey, orders.o_orderstatus, orders.o_totalprice, orders.o_orderdate, orders.o_orderpriority, orders.o_clerk, orders.o_shippriority, orders.o_comment
Remote query: SELECT o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment FROM ONLY public.orders WHERE o_orderkey < 3 ORDER BY 1
(6 rows)
select * from (select all * from orders where o_orderkey < 3 order by o_orderkey);
o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment
------------+-----------+---------------+--------------+--------------------------+-----------------+-----------------+----------------+----------------------------------------------------------
1 | 370 | O | 172799.49 | Tue Jan 02 00:00:00 1996 | 5-LOW | Clerk#000000951 | 0 | nstructions sleep furiously among
2 | 781 | O | 38426.09 | Sun Dec 01 00:00:00 1996 | 1-URGENT | Clerk#000000880 | 0 | foxes. pending accounts at the pending, silent asymptot
(2 rows)
--Agg
with revenue (total_revenue) as
( select sum(l_extendedprice * (1 - l_discount)) from lineitem where l_suppkey=1 )
select * from revenue;
total_revenue
---------------
21495562.0223
(1 row)
-- normal table
create table hotel1000 (
lkbh text,
xm text,
xb text,
csrq text,
zjlx text,
zjhm text,
rzsj text,
rzsj_yf float4,
rjfh text,
tfsj text,
lddm text,
ldmc text,
pcsdm text,
fjdm text,
rzsj_q8 text,
etlgxsj text,
hjszd text) distribute by roundrobin;
explain (verbose on, costs off) select
dddd.xm || ' '|| dddd.ldmc || ' rzsj:' || to_char(dddd.rzsj) as info1,
a.xm || ' '|| a.ldmc || ' rzsj:' || to_char(a.rzsj) as info2
from hotel1000 a,
(select
xm,ldmc,lddm,rzsj,zjhm
from
(select
xm,ldmc,lddm,rzsj,zjhm
from
hotel1000
where
rzsj >= '2014-01-01 01:00:00'
and
rzsj <= '2014-12-30 23:00:00'
)
where
xm like 'wang%'
and
lddm in(
select
lddm
from
hotel1000
where
ldmc like '%shangwu%'
)
) dddd
where
a.zjhm <> dddd.zjhm and
ABS(INTERVALTONUM(to_timestamp(a.rzsj,'yyyy-mm-dd hh24:mi:ss') - to_timestamp(dddd.rzsj,'yyyy-mm-dd hh24:mi:ss'))) < 1
;
WARNING: Statistics in some tables or columns(public.hotel1000.rzsj, public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.zjhm, public.hotel1000.lddm) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop
Output: ((((public.hotel1000.xm || ' '::text) || public.hotel1000.ldmc) || ' rzsj:'::text) || public.hotel1000.rzsj), ((((a.xm || ' '::text) || a.ldmc) || ' rzsj:'::text) || a.rzsj)
Join Filter: ((a.zjhm <> public.hotel1000.zjhm) AND (abs(intervaltonum((to_timestamp(a.rzsj, 'yyyy-mm-dd hh24:mi:ss'::text) - to_timestamp(public.hotel1000.rzsj, 'yyyy-mm-dd hh24:mi:ss'::text)))) < 1::numeric))
-> Nested Loop Semi Join
Output: public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.rzsj, public.hotel1000.zjhm
Join Filter: (public.hotel1000.lddm = public.hotel1000.lddm)
-> Data Node Scan on hotel1000 "_REMOTE_TABLE_QUERY_"
Output: public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.rzsj, public.hotel1000.lddm, public.hotel1000.zjhm
Remote query: SELECT xm, ldmc, rzsj, lddm, zjhm FROM ONLY public.hotel1000 WHERE rzsj >= '2014-01-01 01:00:00'::text AND rzsj <= '2014-12-30 23:00:00'::text AND xm ~~ 'wang%'::text
-> Data Node Scan on hotel1000 "_REMOTE_TABLE_QUERY_"
Output: public.hotel1000.lddm
Remote query: SELECT lddm FROM ONLY public.hotel1000 WHERE ldmc ~~ '%shangwu%'::text
-> Data Node Scan on hotel1000 "_REMOTE_TABLE_QUERY_"
Output: a.xm, a.ldmc, a.rzsj, a.zjhm
Remote query: SELECT xm, ldmc, rzsj, zjhm FROM ONLY public.hotel1000 a WHERE true
(15 rows)
explain (verbose on, costs off) select
dddd.xm || ' '|| dddd.ldmc || ' rzsj:' || to_char(dddd.rzsj) as info1,
a.xm || ' '|| a.ldmc || ' rzsj:' || to_char(a.rzsj) as info2
from hotel1000 a,
(select
xm,ldmc,lddm,rzsj,zjhm
from
(select
xm,ldmc,lddm,rzsj,zjhm
from
hotel1000
where
rzsj >= '2014-01-01 01:00:00'
and
rzsj <= '2014-12-30 23:00:00'
)
where
xm like 'wang%'
and
lddm in(
select
lddm
from
hotel1000
where
ldmc like '%shangwu%'
)
) dddd
where
a.zjhm <> dddd.zjhm and
ABS(INTERVALTONUM(to_timestamp(a.rzsj,'yyyy-mm-dd hh24:mi:ss') - to_timestamp(dddd.rzsj,'yyyy-mm-dd hh24:mi:ss'))) < 1
;
WARNING: Statistics in some tables or columns(public.hotel1000.rzsj, public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.zjhm, public.hotel1000.lddm) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop
Output: ((((public.hotel1000.xm || ' '::text) || public.hotel1000.ldmc) || ' rzsj:'::text) || public.hotel1000.rzsj), ((((a.xm || ' '::text) || a.ldmc) || ' rzsj:'::text) || a.rzsj)
Join Filter: ((a.zjhm <> public.hotel1000.zjhm) AND (abs(intervaltonum((to_timestamp(a.rzsj, 'yyyy-mm-dd hh24:mi:ss'::text) - to_timestamp(public.hotel1000.rzsj, 'yyyy-mm-dd hh24:mi:ss'::text)))) < 1::numeric))
-> Nested Loop Semi Join
Output: public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.rzsj, public.hotel1000.zjhm
Join Filter: (public.hotel1000.lddm = public.hotel1000.lddm)
-> Data Node Scan on hotel1000 "_REMOTE_TABLE_QUERY_"
Output: public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.rzsj, public.hotel1000.lddm, public.hotel1000.zjhm
Remote query: SELECT xm, ldmc, rzsj, lddm, zjhm FROM ONLY public.hotel1000 WHERE rzsj >= '2014-01-01 01:00:00'::text AND rzsj <= '2014-12-30 23:00:00'::text AND xm ~~ 'wang%'::text
-> Data Node Scan on hotel1000 "_REMOTE_TABLE_QUERY_"
Output: public.hotel1000.lddm
Remote query: SELECT lddm FROM ONLY public.hotel1000 WHERE ldmc ~~ '%shangwu%'::text
-> Data Node Scan on hotel1000 "_REMOTE_TABLE_QUERY_"
Output: a.xm, a.ldmc, a.rzsj, a.zjhm
Remote query: SELECT xm, ldmc, rzsj, zjhm FROM ONLY public.hotel1000 a WHERE true
(15 rows)
drop table hotel1000;
--create foreign table
create foreign table hotel1000 (
lkbh text,
xm text,
xb text,
csrq text,
zjlx text,
zjhm text,
rzsj text,
rzsj_yf float4,
rjfh text,
tfsj text,
lddm text,
ldmc text,
pcsdm text,
fjdm text,
rzsj_q8 text,
etlgxsj text,
hjszd text)
server hdfs_server OPTIONS(format 'orc', foldername '/user/hive/warehouse/fvt_data_query_hive.db/hotel_1000')
distribute by roundrobin;
explain (verbose on, costs off) select
dddd.xm || ' '|| dddd.ldmc || ' rzsj:' || to_char(dddd.rzsj) as info1,
a.xm || ' '|| a.ldmc || ' rzsj:' || to_char(a.rzsj) as info2
from hotel1000 a,
(select
xm,ldmc,lddm,rzsj,zjhm
from
(select
xm,ldmc,lddm,rzsj,zjhm
from
hotel1000
where
rzsj >= '2014-01-01 01:00:00'
and
rzsj <= '2014-12-30 23:00:00'
)
where
xm like 'wang%'
and
lddm in(
select
lddm
from
hotel1000
where
ldmc like '%shangwu%'
)
) dddd
where
a.zjhm <> dddd.zjhm and
ABS(INTERVALTONUM(to_timestamp(a.rzsj,'yyyy-mm-dd hh24:mi:ss') - to_timestamp(dddd.rzsj,'yyyy-mm-dd hh24:mi:ss'))) < 1
;
WARNING: Statistics in some tables or columns(public.hotel1000.rzsj, public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.zjhm, public.hotel1000.lddm) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Row Adapter
Output: (((((public.hotel1000.xm || ' '::text) || public.hotel1000.ldmc) || ' rzsj:'::text) || public.hotel1000.rzsj)), (((((a.xm || ' '::text) || a.ldmc) || ' rzsj:'::text) || a.rzsj))
-> Vector Streaming (type: GATHER)
Output: (((((public.hotel1000.xm || ' '::text) || public.hotel1000.ldmc) || ' rzsj:'::text) || public.hotel1000.rzsj)), (((((a.xm || ' '::text) || a.ldmc) || ' rzsj:'::text) || a.rzsj))
-> Vector Nest Loop Semi Join
Output: ((((public.hotel1000.xm || ' '::text) || public.hotel1000.ldmc) || ' rzsj:'::text) || public.hotel1000.rzsj), ((((a.xm || ' '::text) || a.ldmc) || ' rzsj:'::text) || a.rzsj)
Join Filter: (public.hotel1000.lddm = public.hotel1000.lddm)
-> Vector Nest Loop
Output: a.xm, a.ldmc, a.rzsj, public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.rzsj, public.hotel1000.lddm
Join Filter: ((a.zjhm <> public.hotel1000.zjhm) AND (abs(intervaltonum((to_timestamp(a.rzsj, 'yyyy-mm-dd hh24:mi:ss'::text) - to_timestamp(public.hotel1000.rzsj, 'yyyy-mm-dd hh24:mi:ss'::text)))) < 1::numeric))
-> Vector Streaming(type: BROADCAST)
Output: a.xm, a.ldmc, a.rzsj, a.zjhm
-> Vector Foreign Scan on public.hotel1000 a
Output: a.xm, a.ldmc, a.rzsj, a.zjhm
Server Type: hdfs
Orc File: /user/hive/warehouse/fvt_data_query_hive.db/hotel_1000
-> Vector Foreign Scan on public.hotel1000
Output: public.hotel1000.xm, public.hotel1000.ldmc, public.hotel1000.rzsj, public.hotel1000.lddm, public.hotel1000.zjhm
Filter: (public.hotel1000.xm ~~ 'wang%'::text)
Pushdown Predicate Filter: ((public.hotel1000.rzsj >= '2014-01-01 01:00:00'::text) AND (public.hotel1000.rzsj <= '2014-12-30 23:00:00'::text))
Server Type: hdfs
Orc File: /user/hive/warehouse/fvt_data_query_hive.db/hotel_1000
-> Vector Materialize
Output: public.hotel1000.lddm
-> Vector Streaming(type: BROADCAST)
Output: public.hotel1000.lddm
-> Vector Foreign Scan on public.hotel1000
Output: public.hotel1000.lddm
Filter: (public.hotel1000.ldmc ~~ '%shangwu%'::text)
Server Type: hdfs
Orc File: /user/hive/warehouse/fvt_data_query_hive.db/hotel_1000
(31 rows)
--add LLT
drop table if exists testllt001;
NOTICE: table "testllt001" does not exist, skipping
create table testllt001(id int, name text) distribute by replication;
explain (verbose on, costs off) select * from t where c1 in (select id from testllt001 where id =2);
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
--------------------------------------------------------------------------
Nested Loop Semi Join
Output: t.c1, t.c2
-> Data Node Scan on t "_REMOTE_TABLE_QUERY_"
Output: t.c1, t.c2
Remote query: SELECT c1, c2 FROM ONLY public.t WHERE c1 = 2
-> Data Node Scan on testllt001 "_REMOTE_TABLE_QUERY_"
Output: testllt001.id
Remote query: SELECT id FROM ONLY public.testllt001 WHERE id = 2
(8 rows)
drop table if exists testllt003;
NOTICE: table "testllt003" does not exist, skipping
create table testllt003( id int);
explain (verbose on, costs off) select * from testllt003 union select R_REGIONKEY from region;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
-----------------------------------------------------------------------------------
HashAggregate
Output: testllt003.id
Group By Key: testllt003.id
-> Append
-> Data Node Scan on testllt003 "_REMOTE_TABLE_QUERY_"
Output: testllt003.id
Remote query: SELECT id FROM ONLY public.testllt003 WHERE true
-> Data Node Scan on region "_REMOTE_TABLE_QUERY_"
Output: region.r_regionkey
Remote query: SELECT r_regionkey FROM ONLY public.region WHERE true
(10 rows)
drop table testllt001;
drop table testllt003;
drop table if exists testllt100t;
NOTICE: table "testllt100t" does not exist, skipping
create table testllt100t(c1 int, c2 int) distribute by roundrobin ;
create index testllt100t_index on testllt100t (c1);
drop table if exists testllt100s;
NOTICE: table "testllt100s" does not exist, skipping
create table testllt100s (b1 int, b2 int, b3 int) ;
create index testllt100s_index on testllt100s (b1);
explain (verbose on, costs off) select c1 from testllt100t where c1 in (select b1 from testllt100s where b1=7);
WARNING: Statistics in some tables or columns(public.testllt100t.c1, public.testllt100s.b1) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
---------------------------------------------------------------------------
Nested Loop Semi Join
Output: testllt100t.c1
-> Data Node Scan on testllt100t "_REMOTE_TABLE_QUERY_"
Output: testllt100t.c1
Remote query: SELECT c1 FROM ONLY public.testllt100t WHERE c1 = 7
-> Data Node Scan on testllt100s "_REMOTE_TABLE_QUERY_"
Output: testllt100s.b1
Remote query: SELECT b1 FROM ONLY public.testllt100s WHERE b1 = 7
(8 rows)
explain (verbose on, costs off) select c1 from testllt100t where c1 in (select b2 from testllt100s where b1=7);
WARNING: Statistics in some tables or columns(public.testllt100t.c1, public.testllt100s.b1, public.testllt100s.b2) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
---------------------------------------------------------------------------------
Hash Semi Join
Output: testllt100t.c1
Hash Cond: (testllt100t.c1 = testllt100s.b2)
-> Data Node Scan on testllt100t "_REMOTE_TABLE_QUERY_"
Output: testllt100t.c1
Remote query: SELECT c1 FROM ONLY public.testllt100t WHERE true
-> Hash
Output: testllt100s.b2
-> Data Node Scan on testllt100s "_REMOTE_TABLE_QUERY_"
Output: testllt100s.b2
Remote query: SELECT b2 FROM ONLY public.testllt100s WHERE b1 = 7
(11 rows)
drop table if exists testllt100t;
create table testllt100t(c1 int, c2 int) with (orientation=column) distribute by roundrobin ;
drop table if exists testllt100s;
create table testllt100s (b1 int, b2 int, b3 int) with (orientation=column);
explain (verbose on, costs off) select c1 from testllt100t where c1 in (select b1 from testllt100s where b1=7);
WARNING: Statistics in some tables or columns(public.testllt100t.c1, public.testllt100s.b1) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
---------------------------------------------------------------------------
Nested Loop Semi Join
Output: testllt100t.c1
-> Data Node Scan on testllt100t "_REMOTE_TABLE_QUERY_"
Output: testllt100t.c1
Remote query: SELECT c1 FROM ONLY public.testllt100t WHERE c1 = 7
-> Data Node Scan on testllt100s "_REMOTE_TABLE_QUERY_"
Output: testllt100s.b1
Remote query: SELECT b1 FROM ONLY public.testllt100s WHERE b1 = 7
(8 rows)
create index testllt100t_index on testllt100t (c1);
create index testllt100s_index on testllt100s (b1);
explain (verbose on, costs off) select c1 from testllt100t where c1 in (select b1 from testllt100s where b1=7);
WARNING: Statistics in some tables or columns(public.testllt100t.c1, public.testllt100s.b1) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
---------------------------------------------------------------------------
Nested Loop Semi Join
Output: testllt100t.c1
-> Data Node Scan on testllt100t "_REMOTE_TABLE_QUERY_"
Output: testllt100t.c1
Remote query: SELECT c1 FROM ONLY public.testllt100t WHERE c1 = 7
-> Data Node Scan on testllt100s "_REMOTE_TABLE_QUERY_"
Output: testllt100s.b1
Remote query: SELECT b1 FROM ONLY public.testllt100s WHERE b1 = 7
(8 rows)
drop table testllt100t;
drop table testllt100s;
create foreign table SUBQUERY_T1(
D_ID int,
D_W_ID int,
D_NAME varchar(10),
D_STREET_1 varchar(20))SERVER hdfs_server
OPTIONS(format 'orc', foldername '/user/hive/warehouse/hive/subquery_orc_t1')
distribute by roundrobin;
explain (verbose on, costs off) WITH tmp1 as (select D_ID from SUBQUERY_T1 order by D_ID)SELECT DISTINCT * FROM ( SELECT ALL * FROM SUBQUERY_T1 where D_ID = D_W_ID group by D_ID,D_NAME,D_W_ID,D_STREET_1 having D_ID < 10 order by D_ID,D_NAME,D_W_ID,D_STREET_1 DESC NULLS FIRST LIMIT 1 ) group by D_ID,D_NAME,D_W_ID,D_STREET_1 order by D_ID,D_NAME,D_W_ID,D_STREET_1 FETCH NEXT 20 ROWS ONLY ;
WARNING: Statistics in some tables or columns(public.subquery_t1.d_id, public.subquery_t1.d_w_id, public.subquery_t1.d_name, public.subquery_t1.d_street_1) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Row Adapter
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
-> Vector Limit
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
-> Vector Unique
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
-> Vector Group
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Group By Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1
-> Vector Sort
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Sort Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1
-> Vector Streaming (type: GATHER)
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
-> Vector Group
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Group By Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1
-> Vector Sort
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Sort Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1
-> Vector Limit
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
-> Vector Streaming(type: BROADCAST)
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Merge Sort Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_street_1 DESC
-> Vector Limit
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
-> Vector Group
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Group By Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1
-> Vector Sort
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Sort Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1 DESC
-> Vector Streaming(type: REDISTRIBUTE)
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Distribute Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1
-> Vector Group
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Group By Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1
-> Vector Sort
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Sort Key: subquery_t1.d_id, subquery_t1.d_name, subquery_t1.d_w_id, subquery_t1.d_street_1 DESC
-> Vector Foreign Scan on public.subquery_t1
Output: subquery_t1.d_id, subquery_t1.d_w_id, subquery_t1.d_name, subquery_t1.d_street_1
Filter: (subquery_t1.d_id = subquery_t1.d_w_id)
Pushdown Predicate Filter: (subquery_t1.d_id < 10)
Server Type: hdfs
Orc File: /user/hive/warehouse/hive/subquery_orc_t1
(48 rows)
drop foreign table SUBQUERY_T1;
create table test_mergeappend_1(a integer);
create table test_mergeappend_2(a integer);
explain (verbose on, costs off)(select * from test_mergeappend_1 order by a) union all (select * from test_mergeappend_2 order by 1) order by 1;
WARNING: Statistics in some tables or columns(public.test_mergeappend_1.a, public.test_mergeappend_2.a) are not collected.
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
----------------------------------------------------------------
Streaming (type: GATHER)
Output: test_mergeappend_1.a
Merge Sort Key: test_mergeappend_1.a
-> Sort
Output: test_mergeappend_1.a
Sort Key: test_mergeappend_1.a
-> Result
Output: test_mergeappend_1.a
-> Append
-> Seq Scan on public.test_mergeappend_1
Output: test_mergeappend_1.a
Distribute Key: test_mergeappend_1.a
-> Seq Scan on public.test_mergeappend_2
Output: test_mergeappend_2.a
Distribute Key: test_mergeappend_2.a
(15 rows)
drop table test_mergeappend_1;
drop table test_mergeappend_2;
create table t_subqueryscan (a int, b int);
explain (verbose on, costs off) select a from (select count(a) as a, count(b) as b from t_subqueryscan) order by 1;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
------------------------------------------------------------------------------------------------
Streaming (type: GATHER)
Output: __unnamed_subquery__.a
-> Sort
Output: __unnamed_subquery__.a
Sort Key: __unnamed_subquery__.a
-> Subquery Scan on __unnamed_subquery__
Output: __unnamed_subquery__.a
-> Aggregate
Output: count((count(t_subqueryscan.a))), count((count(t_subqueryscan.b)))
-> Streaming(type: BROADCAST)
Output: (count(t_subqueryscan.a)), (count(t_subqueryscan.b))
-> Aggregate
Output: count(t_subqueryscan.a), count(t_subqueryscan.b)
-> Seq Scan on public.t_subqueryscan
Output: t_subqueryscan.a, t_subqueryscan.b
Distribute Key: t_subqueryscan.a
(16 rows)
drop table t_subqueryscan;
--end LLT
create table test200(id200 int ) distribute by roundrobin;
----------------
on
(1 row)
explain (verbose on, costs off)select * from (select id200 as id from test200 ) y, (select id200 from test200)x;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
Output: public.test200.id200, public.test200.id200
-> Data Node Scan on test200 "_REMOTE_TABLE_QUERY_"
Output: public.test200.id200
Remote query: SELECT id200 FROM ONLY public.test200 WHERE true
-> Data Node Scan on test200 "_REMOTE_TABLE_QUERY_"
Output: public.test200.id200
Remote query: SELECT id200 FROM ONLY public.test200 WHERE true
(8 rows)
create view test200_view as select * from test200;
explain (verbose on, costs off)select * from (select id200 as id from test200_view ) y, (select id200 from test200_view)x;
--?.*
HINT: Do analyze for them in order to generate optimized plan.
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
Output: public.test200.id200, public.test200.id200
-> Data Node Scan on test200 "_REMOTE_TABLE_QUERY_"
Output: public.test200.id200
Remote query: SELECT id200 FROM ONLY public.test200 WHERE true
-> Data Node Scan on test200 "_REMOTE_TABLE_QUERY_"
Output: public.test200.id200
Remote query: SELECT id200 FROM ONLY public.test200 WHERE true
(8 rows)
drop table test200 cascade;
NOTICE: drop cascades to view test200_view
drop foreign table if exists reason;
NOTICE: foreign table "reason" does not exist, skipping
drop foreign table if exists store_sales;
NOTICE: foreign table "store_sales" does not exist, skipping
create foreign table reason (
r_reason_sk int ,
r_reason_id char(16) ,
r_reason_desc char(100) )
server hdfs_server OPTIONS(format 'orc', foldername '/user/hive/warehouse/reason')
distribute by roundrobin;
create foreign table store_sales (
ss_sold_date_sk int ,
ss_sold_time_sk int ,
ss_item_sk int ,
ss_customer_sk int ,
ss_cdemo_sk int ,
ss_hdemo_sk int ,
ss_addr_sk int ,
ss_store_sk int ,
ss_promo_sk int ,
ss_ticket_number int ,
ss_quantity int ,
ss_wholesale_cost decimal(7,2) ,
ss_list_price decimal(7,2) ,
ss_sales_price decimal(7,2) ,
ss_ext_discount_amt decimal(7,2) ,
ss_ext_sales_price decimal(7,2) ,
ss_ext_wholesale_cost decimal(7,2) ,
ss_ext_list_price decimal(7,2) ,
ss_ext_tax decimal(7,2) ,
ss_coupon_amt decimal(7,2) ,
ss_net_paid decimal(7,2) ,
ss_net_paid_inc_tax decimal(7,2) ,
ss_net_profit decimal(7,2) )
server hdfs_server OPTIONS(format 'orc', foldername '/user/hive/warehouse/store_sales')
distribute by roundrobin;
select case when (select count(*)
from store_sales
where ss_quantity between 1 and 20) > 0.001*74129
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 1 and 20)
else (select avg(ss_net_paid)
from store_sales
where ss_quantity between 1 and 20) end bucket1
from reason
where r_reason_sk = 1
limit 100;
bucket1
---------------------
39.6454131784259568
(1 row)
drop foreign table if exists reason;
drop foreign table if exists store_sales;
--
--Drop table
--
drop foreign table d_bigrow;
drop foreign table customer_reviews;
drop foreign table hotel1000;
drop table t;
drop table s;
drop table r;
drop table customer;
drop table lineitem;
drop table part;
drop table partsupp;
drop table orders;
drop table supplier;
drop table nation;
drop table region;
DROP SERVER hdfs_server CASCADE;