From df22344550275c27df9d0640d146d8efe14920a3 Mon Sep 17 00:00:00 2001 From: xzj7019 <131111794+xzj7019@users.noreply.github.com> Date: Wed, 19 Jun 2024 12:23:48 +0800 Subject: [PATCH] [opt](tools) update tools schema (#36114) pick from master #35873 Update tpcds tools table customer_demographics's bucket column as its primary key column, avoid performance issue due to data skew. --- tools/tpcds-tools/ddl/create-tpcds-tables-sf1.sql | 2 +- tools/tpcds-tools/ddl/create-tpcds-tables-sf100.sql | 2 +- tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql | 2 +- tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/tpcds-tools/ddl/create-tpcds-tables-sf1.sql b/tools/tpcds-tools/ddl/create-tpcds-tables-sf1.sql index c29db69a2d..084ae989a3 100644 --- a/tools/tpcds-tools/ddl/create-tpcds-tables-sf1.sql +++ b/tools/tpcds-tools/ddl/create-tpcds-tables-sf1.sql @@ -27,7 +27,7 @@ CREATE TABLE IF NOT EXISTS customer_demographics ( cd_dep_college_count integer ) DUPLICATE KEY(cd_demo_sk) -DISTRIBUTED BY HASH(cd_gender) BUCKETS 12 +DISTRIBUTED BY HASH(cd_demo_sk) BUCKETS 12 PROPERTIES ( "replication_num" = "1" ); diff --git a/tools/tpcds-tools/ddl/create-tpcds-tables-sf100.sql b/tools/tpcds-tools/ddl/create-tpcds-tables-sf100.sql index 6ed0f820ee..eb5edb7b4b 100644 --- a/tools/tpcds-tools/ddl/create-tpcds-tables-sf100.sql +++ b/tools/tpcds-tools/ddl/create-tpcds-tables-sf100.sql @@ -27,7 +27,7 @@ CREATE TABLE IF NOT EXISTS customer_demographics ( cd_dep_college_count integer ) DUPLICATE KEY(cd_demo_sk) -DISTRIBUTED BY HASH(cd_gender) BUCKETS 12 +DISTRIBUTED BY HASH(cd_demo_sk) BUCKETS 12 PROPERTIES ( "replication_num" = "1" ); diff --git a/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql b/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql index 5601e15923..fedc7970e2 100644 --- a/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql +++ b/tools/tpcds-tools/ddl/create-tpcds-tables-sf1000.sql @@ -27,7 +27,7 @@ CREATE TABLE IF NOT EXISTS customer_demographics ( cd_dep_college_count integer ) DUPLICATE KEY(cd_demo_sk) -DISTRIBUTED BY HASH(cd_gender) BUCKETS 12 +DISTRIBUTED BY HASH(cd_demo_sk) BUCKETS 12 PROPERTIES ( "replication_num" = "1" ); diff --git a/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql b/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql index 760672922c..20574ca534 100644 --- a/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql +++ b/tools/tpcds-tools/ddl/create-tpcds-tables-sf10000.sql @@ -27,7 +27,7 @@ CREATE TABLE IF NOT EXISTS customer_demographics ( cd_dep_college_count integer ) DUPLICATE KEY(cd_demo_sk) -DISTRIBUTED BY HASH(cd_gender) BUCKETS 12 +DISTRIBUTED BY HASH(cd_demo_sk) BUCKETS 12 PROPERTIES ( "replication_num" = "1" );