// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. suite("test_array_map") { sql """ drop table if exists mock_table; """ sql """ CREATE ALIAS FUNCTION clean_html_entity_test(string) WITH PARAMETER(html) AS REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE( REPLACE(html, '&', '&'), '<', '<' ), '>', '>' ), '"', '"' ), ''', '\\\'' ),'€', '€' ), ' ', ' ' ), "Ⅰ", "I"), "Ⅱ", "II"), "Ⅲ", "III"),".", ". "); """ sql """ CREATE ALIAS FUNCTION clean_html_tag_test(string) WITH PARAMETER(html) AS REGEXP_REPLACE(html, ']+>', ''); """ sql """ CREATE TABLE `mock_table` ( `aa` varchar(255) NULL, `ab` varchar(255) NULL, `ac` varchar(255) NULL, `ad` text NULL, `ae` text NULL, `af` text NULL, `ag` text NULL, `ah` text NULL, `ai` text NULL, `aj` varchar(255) NULL, `ak` text NULL, `al` text NULL, `am` text NULL, `an` text NULL, `ao` text NULL, `ap` text NULL, `aq` text NULL, `ar` text NULL, `as` text NULL, `at` text NULL, `au` text NULL, `av` bigint NULL, `aw` text NULL, `ax` varchar(255) NULL, `ay` text NULL, `az` varchar(255) NULL, `ba` varchar(255) NULL, `bb` varchar(255) NULL, `bc` int NULL, `bd` int NULL, `be` varchar(255) NULL, `bf` varchar(255) NULL, `bg` array NULL, `bh` json NULL, `bi` varchar(255) NULL, `bj` varchar(255) NULL, `bk` array NULL, `bl` boolean NULL, INDEX idx_ag (`ag`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_ad (`ad`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_ah (`ah`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_ac (`ac`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_ak (`ak`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_al (`al`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_am (`am`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_ag_ngrambf (`ag`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"), INDEX idx_ad_ngrambf (`ad`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"), INDEX idx_ac_ngrambf (`ac`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"), INDEX idx_ah_ngrambf (`ah`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"), INDEX idx_bi (`bi`) USING INVERTED, INDEX idx_ar (`ar`) USING INVERTED PROPERTIES("support_phrase" = "true", "parser" = "unicode", "lower_case" = "true"), INDEX idx_ar_ngrambf (`ar`) USING NGRAM_BF PROPERTIES("bf_size" = "256", "gram_size" = "2"), INDEX idx_bl (`bl`) USING INVERTED ) ENGINE=OLAP UNIQUE KEY(`aa`) DISTRIBUTED BY HASH(`aa`) BUCKETS 16 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "min_load_replica_num" = "-1", "is_being_synced" = "false", "storage_medium" = "hdd", "storage_format" = "V2", "inverted_index_storage_format" = "V1", "enable_unique_key_merge_on_write" = "true", "light_schema_change" = "true", "disable_auto_compaction" = "false", "enable_single_replica_compaction" = "false", "group_commit_interval_ms" = "10000", "group_commit_data_bytes" = "134217728", "enable_mow_light_delete" = "false" ); """ sql """ CREATE VIEW `mock_view` AS WITH bm AS (SELECT `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, `ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, `ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, `bk`, `bl`, CASE WHEN YEAR(`as`) >= 1970 THEN `as` ELSE NULL END as `bn`, CASE WHEN YEAR(`au`) >= 1970 THEN `au` ELSE NULL END as `bo`, CASE WHEN YEAR(`at`) >= 1970 THEN `at` ELSE NULL END as `bp`, LENGTH(`aw`) as `bq`, TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`))) as `br`, TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`))) as `bs`, ARRAY_MAP(x-> if(least((left(x, 5) = '6841-'), (length(x) = 10)), concat_ws('-', substring(x, 1, 7), substring(x, 8)), if(least((left(x, 5) = '6841-'), (length(x) = 9)), concat_ws('-', substring(x, 1, 6), substring(x, 7)), x)), `bk`) as `bt`, ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`))),'"parser"="unicode", "lower_case"="false", "stopwords"="none"'), " ") as `bu`, ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`))),'"parser"="unicode", "lower_case"="false", "stopwords"="none"'), " ") as `bv` FROM mock_table), bw AS (SELECT `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, `ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, `ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, `bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`, CASE WHEN LENGTH(`bn`) = 10 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd') WHEN LENGTH(`bn`) = 19 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd HH:mm:ss') WHEN LENGTH(`bn`) = 26 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd HH:mm:ss.SSSSSS') ELSE NULL END AS `bx`, CASE WHEN LENGTH(`bo`) = 10 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd') WHEN LENGTH(`bo`) = 19 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd HH:mm:ss') WHEN LENGTH(`bo`) = 26 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd HH:mm:ss.SSSSSS') ELSE NULL END AS `by`, CASE WHEN LENGTH(`bp`) = 10 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd') WHEN LENGTH(`bp`) = 19 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd HH:mm:ss') WHEN LENGTH(`bp`) = 26 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd HH:mm:ss.SSSSSS') ELSE NULL END AS `bz`, ARRAY_REMOVE( ARRAY_COMPACT( ARRAY_UNION( ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, '-'), 1, size(split_by_string(x, '-')) -1), '-'), `bt`), ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, '-'), 1, size(split_by_string(x, '-')) -2), '-'), `bt`), ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, '-'), 1, size(split_by_string(x, '-')) -3), '-'), `bt`))), '') as `ca`, SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ') as `cb`, SPLIT_BY_STRING(`bu`, ' ') as `cc`, array_first_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `cd`, array_last_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `ce`, SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ') as `cf`, SPLIT_BY_STRING(`bv`, ' ') as `cg`, array_first_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ch`, array_last_index(x-> locate('*', x ) = 0, SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ci` FROM bm), cj AS (SELECT `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, `ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, `ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, `bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`, `bx`, `by`, `bz`, `ca`, `cb`, `cc`, `cd`, `ce`, `cf`, `cg`, `ch`, `ci`, ARRAY_COMPACT(ARRAY_EXCEPT(`bt`, `ca`)) as `ck`, ARRAY_COMPACT(ARRAY_UNION(`bt`, `ca`)) as `cl`, CASE WHEN SIZE(`cc`) = 0 THEN `bs` WHEN `cd`=1 AND `ce` < size(`cb`) and `ce` - `cd` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ") WHEN `cd`=2 AND `ce` < size(`cb`) and `ce` - `cd` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ") WHEN `cd` >2 AND `ce` = size(`cb`) and `ce` - `cd` > 1 THEN CASE WHEN element_at(`cc`, 1) = element_at(`cc`, `cd`-1) THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`-1), "") ELSE ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`), " ") END ELSE ARRAY_JOIN(`cc`, " ") END AS `cm`, CASE WHEN size(`cc`) = 0 THEN "tokenize_failed" WHEN `cd` = 0 THEN "en" WHEN `cd`=1 AND `ce` = size(`cb`) THEN "zh" WHEN `cd`=1 AND `ce` < size(`cb`) THEN "zh_en" WHEN `cd`=2 AND `ce` < size(`cb`) THEN "zh_en" WHEN `cd` >2 AND `ce` = size(`cb`) THEN "en_zh" ELSE "mixed" END AS `cn`, CASE WHEN SIZE(`cg`) = 0 THEN `br` WHEN `ch`=1 AND `ci` < size(`cf`) and `ci` - `ch` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ") WHEN `ch`=2 AND `ci` < size(`cf`) and `ci` - `ch` > 1 THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ") WHEN `ch` >2 AND `ci` = size(`cf`) and `ci` - `ch` > 1 THEN CASE WHEN element_at(`cg`, 1) = element_at(`cg`, `ch`-1) THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`-1), "") ELSE ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`), " ") END ELSE ARRAY_JOIN(`cg`, " ") END AS `co`, CASE WHEN size(`cg`) = 0 THEN "tokenize_failed" WHEN `ch` = 0 THEN "en" WHEN `ch`=1 AND `ci` = size(`cf`) THEN "zh" WHEN `ch`=1 AND `ci` < size(`cf`) THEN "zh_en" WHEN `ch`=2 AND `ci` < size(`cf`) THEN "zh_en" WHEN `ch` >2 AND `ci` = size(`cf`) THEN "en_zh" ELSE "mixed" END AS `cp` FROM bw) SELECT * FROM cj; """ sql """ insert into mock_table(aa, ab,ac,ad) values('1','2','3','4'); """ qt_sql """ SELECT * FROM mock_view LIMIT 530000,1000; """ }