diff --git a/docker/thirdparties/docker-compose/hive/gen_env.sh b/docker/thirdparties/docker-compose/hive/gen_env.sh.tpl similarity index 94% rename from docker/thirdparties/docker-compose/hive/gen_env.sh rename to docker/thirdparties/docker-compose/hive/gen_env.sh.tpl index 2a81769a73..483c0e54a1 100755 --- a/docker/thirdparties/docker-compose/hive/gen_env.sh +++ b/docker/thirdparties/docker-compose/hive/gen_env.sh.tpl @@ -30,11 +30,11 @@ HMS_PORT=9183 cp "${ROOT}"/hadoop-hive.env.tpl "${ROOT}"/hadoop-hive.env # Need to set hostname of container to same as host machine's. # Otherwise, the doris process can not connect to namenode directly. -HOST_NAME=$(hostname) +HOST_NAME="doris--" { echo "FS_PORT=${FS_PORT}" echo "HMS_PORT=${HMS_PORT}" - echo "CORE_CONF_fs_defaultFS=hdfs://${HOST_NAME}:${FS_PORT}" + echo "CORE_CONF_fs_defaultFS=hdfs://doris--namenode:${FS_PORT}" echo "HOST_NAME=${HOST_NAME}" } >>"${ROOT}"/hadoop-hive.env diff --git a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl.tpl b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl.tpl index a05d57a8b7..887564bd55 100644 --- a/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl.tpl +++ b/docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl.tpl @@ -15,7 +15,7 @@ # limitations under the License. # -HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://doris--hive-metastore-postgresql/metastore +HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive diff --git a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl index 028f3212a5..93ced99ac3 100644 --- a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl +++ b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl @@ -15,8 +15,13 @@ # limitations under the License. # + version: "3.8" +networks: + doris--network: + driver: bridge + services: doris--namenode: image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 @@ -24,28 +29,31 @@ services: - CLUSTER_NAME=test env_file: - ./hadoop-hive.env - hostname: ${HOST_NAME} + hostname: doris--namenode + container_name: doris--namenode expose: - "50070" + - "8020" + - "9000" - "${FS_PORT}" ports: - - target: ${FS_PORT} - published: ${FS_PORT} - protocol: tcp - mode: host + - "${FS_PORT}:${FS_PORT}" healthcheck: test: [ "CMD", "curl", "http://localhost:50070/" ] interval: 5s - timeout: 60s + timeout: 120s retries: 120 networks: - - doris--hive + - doris--network + doris--datanode: image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 env_file: - ./hadoop-hive.env environment: SERVICE_PRECONDITION: "doris--namenode:50070" + hostname: doris--datanode + container_name: doris--datanode expose: - "50075" healthcheck: @@ -54,7 +62,8 @@ services: timeout: 60s retries: 120 networks: - - doris--hive + - doris--network + doris--hive-server: image: bde2020/hive:2.3.2-postgresql-metastore env_file: @@ -62,45 +71,48 @@ services: environment: HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore" SERVICE_PRECONDITION: "doris--hive-metastore:9083" + hostname: doris--hive-server + container_name: doris--hive-server expose: - "10000" depends_on: - doris--datanode: - condition: service_healthy - doris--namenode: - condition: service_healthy + - doris--datanode + - doris--namenode healthcheck: test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;" - interval: 5s - timeout: 60s + interval: 10s + timeout: 120s retries: 120 networks: - - doris--hive + - doris--network + + doris--hive-metastore: image: bde2020/hive:2.3.2-postgresql-metastore env_file: - ./hadoop-hive.env - command: ["sh","-c","/mnt/scripts/hive-metastore.sh"] + command: /bin/bash /mnt/scripts/hive-metastore.sh + # command: /opt/hive/bin/hive --service metastore environment: SERVICE_PRECONDITION: "doris--namenode:50070 doris--datanode:50075 doris--hive-metastore-postgresql:5432" + hostname: doris--hive-metastore + container_name: doris--hive-metastore expose: - "9083" ports: - - ${HMS_PORT}:9083 + - "${HMS_PORT}:9083" volumes: - ./scripts:/mnt/scripts depends_on: - doris--hive-metastore-postgresql: - condition: service_healthy - healthcheck: - test: ["CMD", "sh", "-c", "/mnt/scripts/healthy_check.sh"] - interval: 5s - timeout: 60s - retries: 120 + - doris--hive-metastore-postgresql networks: - - doris--hive + - doris--network + doris--hive-metastore-postgresql: image: bde2020/hive-metastore-postgresql:2.3.0 + restart: always + hostname: doris--hive-metastore-postgresql + container_name: doris--hive-metastore-postgresql expose: - "5432" healthcheck: @@ -109,14 +121,4 @@ services: timeout: 60s retries: 120 networks: - - doris--hive - hello-world: - image: hello-world - depends_on: - doris--hive-metastore: - condition: service_healthy - networks: - - doris--hive - -networks: - doris--hive: + - doris--network diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql index 8bb9f456b8..0f0d8f19d2 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql @@ -31,6 +31,290 @@ TBLPROPERTIES ( msck repair table partition_table; + +CREATE TABLE `delta_byte_array`( + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string, + `c_customer_id` string + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_byte_array' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + + +CREATE TABLE `delta_length_byte_array`( + `FRUIT` string + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_length_byte_array' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table delta_length_byte_array; + +CREATE EXTERNAL TABLE `delta_binary_packed`( + bitwidth0 bigint, + bitwidth1 bigint, + bitwidth2 bigint, + bitwidth3 bigint, + bitwidth4 bigint, + bitwidth5 bigint, + bitwidth6 bigint, + bitwidth7 bigint, + bitwidth8 bigint, + bitwidth9 bigint, + bitwidth10 bigint, + bitwidth11 bigint, + bitwidth12 bigint, + bitwidth13 bigint, + bitwidth14 bigint, + bitwidth15 bigint, + bitwidth16 bigint, + bitwidth17 bigint, + bitwidth18 bigint, + bitwidth19 bigint, + bitwidth20 bigint, + bitwidth21 bigint, + bitwidth22 bigint, + bitwidth23 bigint, + bitwidth24 bigint, + bitwidth25 bigint, + bitwidth26 bigint, + bitwidth27 bigint, + bitwidth28 bigint, + bitwidth29 bigint, + bitwidth30 bigint, + bitwidth31 bigint, + bitwidth32 bigint, + bitwidth33 bigint, + bitwidth34 bigint, + bitwidth35 bigint, + bitwidth36 bigint, + bitwidth37 bigint, + bitwidth38 bigint, + bitwidth39 bigint, + bitwidth40 bigint, + bitwidth41 bigint, + bitwidth42 bigint, + bitwidth43 bigint, + bitwidth44 bigint, + bitwidth45 bigint, + bitwidth46 bigint, + bitwidth47 bigint, + bitwidth48 bigint, + bitwidth49 bigint, + bitwidth50 bigint, + bitwidth51 bigint, + bitwidth52 bigint, + bitwidth53 bigint, + bitwidth54 bigint, + bitwidth55 bigint, + bitwidth56 bigint, + bitwidth57 bigint, + bitwidth58 bigint, + bitwidth59 bigint, + bitwidth60 bigint, + bitwidth61 bigint, + bitwidth62 bigint, + bitwidth63 bigint, + bitwidth64 bigint, + int_value int + ) +STORED AS parquet +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_binary_packed' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table delta_binary_packed; + + +CREATE TABLE `delta_encoding_required_column`( + c_customer_sk int, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_birth_day int, + c_birth_month int, + c_birth_year int, + c_customer_id string, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_country string, + c_email_address string, + c_last_review_date string + ) +STORED AS parquet; + +load data inpath '/user/doris/preinstalled_data/different_types_parquet/delta_encoding_required_column/delta_encoding_required_column.parquet' into table default.delta_encoding_required_column; + +msck repair table delta_encoding_required_column; + + +CREATE EXTERNAL TABLE `delta_encoding_optional_column`( + c_customer_sk int, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_birth_year int, + c_customer_id string, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_country string + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_encoding_optional_column' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table delta_encoding_optional_column; + + +CREATE TABLE `datapage_v1_snappy_compressed_checksum`( + `a` int, + `b` int + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/datapage_v1-snappy-compressed-checksum' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table datapage_v1_snappy_compressed_checksum; + + +CREATE TABLE `overflow_i16_page_cnt`( + `inc` boolean + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/overflow_i16_page_cnt' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table overflow_i16_page_cnt; + + +CREATE TABLE `alltypes_tiny_pages`( + bool_col boolean, + tinyint_col int, + smallint_col int, + int_col int, + bigint_col bigint, + float_col float, + double_col double, + id int, + date_string_col string, + string_col string, + timestamp_col timestamp, + year int, + month int + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/alltypes_tiny_pages' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table alltypes_tiny_pages; + + +CREATE TABLE `alltypes_tiny_pages_plain`( + bool_col boolean, + tinyint_col int, + smallint_col int, + int_col int, + bigint_col bigint, + float_col float, + double_col double, + id int, + date_string_col string, + string_col string, + timestamp_col timestamp, + year int, + month int + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/alltypes_tiny_pages_plain' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table alltypes_tiny_pages_plain; + +CREATE TABLE `example_string`( + `strings` string + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'='\t', + 'serialization.format'='\t') +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/example_string.parquet' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table example_string; + + CREATE EXTERNAL TABLE IF NOT EXISTS `orc_all_types`( `tinyint_col` tinyint, `smallint_col` smallint, @@ -75,23 +359,23 @@ LOCATION '/user/doris/preinstalled_data/data_case/student' TBLPROPERTIES ('transient_lastDdlTime'='1658816839'); CREATE TABLE `lineorder` ( - `lo_orderkey` int, + `lo_orderkey` int, `lo_linenumber` int, - `lo_custkey` int, - `lo_partkey` int, - `lo_suppkey` int, - `lo_orderdate` int, + `lo_custkey` int, + `lo_partkey` int, + `lo_suppkey` int, + `lo_orderdate` int, `lo_orderpriority` varchar(16), - `lo_shippriority` int, - `lo_quantity` int, - `lo_extendedprice` int, - `lo_ordtotalprice` int, + `lo_shippriority` int, + `lo_quantity` int, + `lo_extendedprice` int, + `lo_ordtotalprice` int, `lo_discount` int, `lo_revenue` int, - `lo_supplycost` int, - `lo_tax` int, - `lo_commitdate` int, - `lo_shipmode` varchar(11) + `lo_supplycost` int, + `lo_tax` int, + `lo_commitdate` int, + `lo_shipmode` varchar(11) ) ROW FORMAT DELIMITED FIELDS TERMINATED by ',' LOCATION '/user/doris/preinstalled_data/data_case/lineorder' diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh index b23485dbab..d500944a24 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh +++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh @@ -17,10 +17,20 @@ # under the License. nohup /opt/hive/bin/hive --service metastore & -sleep 10 + +# wait metastore start +sleep 10s + +# if you test in your local,better use # to annotation section about tpch1.db if [[ ! -d "/mnt/scripts/tpch1.db" ]]; then echo "/mnt/scripts/tpch1.db does not exist" exit 1 +else + wget -P /mnt/scripts https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz + cd /mnt/scripts/ + tar -zxf tpch1.db.tar.gz + rm -rf tpch1.db.tar.gz + cd - fi # put data file diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/alltypes_tiny_pages/alltypes_tiny_pages.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/alltypes_tiny_pages/alltypes_tiny_pages.parquet new file mode 100644 index 0000000000..90019d16b4 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/alltypes_tiny_pages/alltypes_tiny_pages.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/alltypes_tiny_pages_plain/alltypes_tiny_pages_plain.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/alltypes_tiny_pages_plain/alltypes_tiny_pages_plain.parquet new file mode 100644 index 0000000000..68d4dcb2da Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/alltypes_tiny_pages_plain/alltypes_tiny_pages_plain.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/datapage_v1-snappy-compressed-checksum/datapage_v1-snappy-compressed-checksum.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/datapage_v1-snappy-compressed-checksum/datapage_v1-snappy-compressed-checksum.parquet new file mode 100644 index 0000000000..8fe2c86ff2 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/datapage_v1-snappy-compressed-checksum/datapage_v1-snappy-compressed-checksum.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_binary_packed/delta_binary_packed.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_binary_packed/delta_binary_packed.parquet new file mode 100644 index 0000000000..4bb56e90e8 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_binary_packed/delta_binary_packed.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_byte_array/delta_byte_array.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_byte_array/delta_byte_array.parquet new file mode 100644 index 0000000000..669e81c24a Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_byte_array/delta_byte_array.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_encoding_optional_column/delta_encoding_optional_column.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_encoding_optional_column/delta_encoding_optional_column.parquet new file mode 100644 index 0000000000..3b06caae2a Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_encoding_optional_column/delta_encoding_optional_column.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_encoding_required_column/delta_encoding_required_column.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_encoding_required_column/delta_encoding_required_column.parquet new file mode 100644 index 0000000000..5b63b21d73 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_encoding_required_column/delta_encoding_required_column.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_length_byte_array/delta_length_byte_array.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_length_byte_array/delta_length_byte_array.parquet new file mode 100644 index 0000000000..131c7e4ff9 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/delta_length_byte_array/delta_length_byte_array.parquet differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/overflow_i16_page_cnt/overflow_i16_page_cnt.parquet b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/overflow_i16_page_cnt/overflow_i16_page_cnt.parquet new file mode 100644 index 0000000000..fe30131ab3 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/different_types_parquet/overflow_i16_page_cnt/overflow_i16_page_cnt.parquet differ diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index bb68a2fe37..53af0d14bd 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -35,7 +35,7 @@ Usage: $0 -c mysql start MySQL -c mysql,hive start MySQL and Hive --stop stop the specified components - + All valid components: mysql,pg,oracle,sqlserver,es,hive,iceberg " @@ -219,14 +219,18 @@ fi if [[ "${RUN_HIVE}" -eq 1 ]]; then # hive # before start it, you need to download parquet file package, see "README" in "docker-compose/hive/scripts/" + cp "${ROOT}"/docker-compose/hive/gen_env.sh.tpl "${ROOT}"/docker-compose/hive/gen_env.sh + sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/gen_env.sh cp "${ROOT}"/docker-compose/hive/hive-2x.yaml.tpl "${ROOT}"/docker-compose/hive/hive-2x.yaml cp "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl.tpl "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hive-2x.yaml sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl - sudo "${ROOT}"/docker-compose/hive/gen_env.sh - sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down + sudo bash "${ROOT}"/docker-compose/hive/gen_env.sh + sudo docker-compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down + sudo sed -i '/${CONTAINER_UID}namenode/d' /etc/hosts if [[ "${STOP}" -ne 1 ]]; then - sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up -d + sudo docker-compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up --build --remove-orphans -d + sudo echo "127.0.0.1 ${CONTAINER_UID}namenode" >> /etc/hosts fi fi diff --git a/regression-test/suites/external_catalog_p0/hive/test_different_parquet_types.groovy b/regression-test/suites/external_catalog_p0/hive/test_different_parquet_types.groovy new file mode 100644 index 0000000000..9c4f969511 --- /dev/null +++ b/regression-test/suites/external_catalog_p0/hive/test_different_parquet_types.groovy @@ -0,0 +1,209 @@ +package suites.external_catalog_p0.hive +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_different_parquet_types", "p0") { + + String hms_port = context.config.otherConfigs.get("hms_port") + String hdfs_port = context.config.otherConfigs.get("hdfs_port") + + // problem 01 :in hive execute "select * from delta_byte_array limit 10" ,there will be some valid data return,but doris query return nothing + def q01 = { + def res1_1 = sql """ + select * from delta_byte_array limit 10 + """ + logger.info("record res" + res1_1.toString()) + + def res1_2 = sql """ + select count(*) from delta_byte_array + """ + logger.info("record res" + res1_2.toString()) + + def res1_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/delta_byte_array/delta_byte_array.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res1_3.toString()) + } + + + // problem 2: hive query return null, doris catalog query return exception, use tvf to query return null, but no exception + + def q03 = { + + //exception info: [INTERNAL_ERROR]Only support csv data in utf8 codec + def res3_1 = sql """ + select * from delta_binary_packed limit 10; + """ + logger.info("record res" + res3_1.toString()) + + def res3_2 = sql """ + select count(*) from delta_binary_packed; + """ + logger.info("record res" + res3_1.toString()) + + //return nothing,but no exception + def res3_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/delta_binary_packed/delta_binary_packed.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res3_3.toString()) + } + + //problem 3: hive query exception, doris query return nothing + def q04 = { + def res4_1 = sql """ + select * from delta_encoding_required_column limit 10; + """ + logger.info("record res" + res4_1.toString()) + + def res4_2 = sql """ + select count(*) from delta_encoding_required_column; + """ + logger.info("record res" + res4_2.toString()) + + def res4_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/delta_encoding_required_column/delta_encoding_required_column.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res4_3.toString()) + } + + + def q05 = { + def res5_1 = sql """ + select * from delta_encoding_optional_column limit 10; + """ + logger.info("record res" + res5_1.toString()) + + + def res5_2 = sql """ + select count(*) from delta_encoding_optional_column; + """ + logger.info("record res" + res5_2.toString()) + + def res5_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/delta_encoding_optional_column/delta_encoding_optional_column.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res5_3.toString()) + } + + + // problem 4:tvf query exception: Can not get first file, please check uri. + def q06 = { + def res6_1 = sql """ + select * from datapage_v1_snappy_compressed_checksum limit 10; + """ + logger.info("record res" + res6_1.toString()) + + def res6_2 = sql """ + select count(*) from datapage_v1_snappy_compressed_checksum; + """ + logger.info("record res" + res6_2.toString()) + + def res6_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/datapage_v1_snappy_compressed_checksum/datapage_v1_snappy_compressed_checksum.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res6_3.toString()) + + } + + //pass + def q07 = { + def res7_1 = sql """ + select * from overflow_i16_page_cnt limit 10; + """ + logger.info("record res" + res7_1.toString()) + + def res7_2 = sql """ + select count(*) from overflow_i16_page_cnt; + """ + logger.info("record res" + res7_2.toString()) + + def res7_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/overflow_i16_page_cnt/overflow_i16_page_cnt.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res7_3.toString()) + } + + //pass + def q08 = { + def res8_1 = sql """ + select * from alltypes_tiny_pages limit 10; + """ + logger.info("record res" + res8_1.toString()) + + + def res8_2 = sql """ + select count(*) from alltypes_tiny_pages limit 10; + """ + logger.info("record res" + res8_2.toString()) + + def res8_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/alltypes_tiny_pages/alltypes_tiny_pages.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res8_3.toString()) + } + //pass + def q09 = { + def res9_1 = sql """ + select * from alltypes_tiny_pages_plain limit 10; + """ + logger.info("record res" + res9_1.toString()) + + + def res9_2 = sql """ + select count(*) from alltypes_tiny_pages_plain limit 10; + """ + logger.info("record res" + res9_2.toString()) + + def res9_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/alltypes_tiny_pages_plain/alltypes_tiny_pages_plain.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res9_3.toString()) + } + + + + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + try { + String resource_name = "hive_different_parquet_types_resource" + String catalog_name = "hive_different_parquet_types" + sql """drop catalog if exists ${catalog_name}""" + sql """drop resource if exists ${resource_name}""" + + sql """create resource if not exists ${resource_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://127.0.0.1:${hms_port}' + );""" + sql """create catalog if not exists ${catalog_name} with resource ${resource_name};""" + sql """use `${catalog_name}`.`default`""" + + q01() + // q02() + q03() + q04() + q05() + q06() + q07() + q08() + q09() + sql """drop catalog if exists ${catalog_name}""" + sql """drop resource if exists ${resource_name}""" + } finally { + } + } +}